aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c35
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/adfs/dir.c24
-rw-r--r--fs/affs/namei.c15
-rw-r--r--fs/affs/super.c1
-rw-r--r--fs/afs/addr_list.c25
-rw-r--r--fs/afs/callback.c84
-rw-r--r--fs/afs/cmservice.c67
-rw-r--r--fs/afs/dir.c54
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/flock.c6
-rw-r--r--fs/afs/fsclient.c31
-rw-r--r--fs/afs/inode.c19
-rw-r--r--fs/afs/internal.h25
-rw-r--r--fs/afs/proc.c134
-rw-r--r--fs/afs/rotate.c20
-rw-r--r--fs/afs/rxrpc.c18
-rw-r--r--fs/afs/security.c17
-rw-r--r--fs/afs/server.c21
-rw-r--r--fs/afs/server_list.c7
-rw-r--r--fs/afs/super.c4
-rw-r--r--fs/afs/vlclient.c19
-rw-r--r--fs/afs/write.c2
-rw-r--r--fs/aio.c736
-rw-r--r--fs/befs/linuxvfs.c17
-rw-r--r--fs/bfs/dir.c43
-rw-r--r--fs/block_dev.c24
-rw-r--r--fs/btrfs/btrfs_inode.h22
-rw-r--r--fs/btrfs/compression.c7
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c145
-rw-r--r--fs/btrfs/ctree.h78
-rw-r--r--fs/btrfs/delayed-inode.c9
-rw-r--r--fs/btrfs/delayed-ref.c275
-rw-r--r--fs/btrfs/delayed-ref.h5
-rw-r--r--fs/btrfs/dev-replace.c150
-rw-r--r--fs/btrfs/disk-io.c417
-rw-r--r--fs/btrfs/extent-tree.c260
-rw-r--r--fs/btrfs/extent_io.c87
-rw-r--r--fs/btrfs/extent_io.h20
-rw-r--r--fs/btrfs/extent_map.c6
-rw-r--r--fs/btrfs/extent_map.h3
-rw-r--r--fs/btrfs/free-space-cache.c6
-rw-r--r--fs/btrfs/free-space-tree.c192
-rw-r--r--fs/btrfs/free-space-tree.h8
-rw-r--r--fs/btrfs/inode.c1377
-rw-r--r--fs/btrfs/ioctl.c1132
-rw-r--r--fs/btrfs/locking.c34
-rw-r--r--fs/btrfs/lzo.c76
-rw-r--r--fs/btrfs/ordered-data.c14
-rw-r--r--fs/btrfs/print-tree.c21
-rw-r--r--fs/btrfs/props.c12
-rw-r--r--fs/btrfs/qgroup.c69
-rw-r--r--fs/btrfs/raid56.c38
-rw-r--r--fs/btrfs/relocation.c10
-rw-r--r--fs/btrfs/scrub.c1
-rw-r--r--fs/btrfs/send.c50
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/sysfs.c52
-rw-r--r--fs/btrfs/sysfs.h4
-rw-r--r--fs/btrfs/tests/btrfs-tests.c4
-rw-r--r--fs/btrfs/tests/btrfs-tests.h6
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c56
-rw-r--r--fs/btrfs/tests/extent-io-tests.c75
-rw-r--r--fs/btrfs/tests/extent-map-tests.c90
-rw-r--r--fs/btrfs/tests/free-space-tests.c177
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c129
-rw-r--r--fs/btrfs/tests/inode-tests.c312
-rw-r--r--fs/btrfs/tests/qgroup-tests.c100
-rw-r--r--fs/btrfs/transaction.c15
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/tree-log.c172
-rw-r--r--fs/btrfs/uuid-tree.c10
-rw-r--r--fs/btrfs/volumes.c515
-rw-r--r--fs/btrfs/volumes.h24
-rw-r--r--fs/cachefiles/namei.c10
-rw-r--r--fs/cachefiles/proc.c19
-rw-r--r--fs/ceph/file.c205
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/Makefile7
-rw-r--r--fs/cifs/cifs_debug.c58
-rw-r--r--fs/cifs/cifs_debug.h2
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifsfs.c61
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h47
-rw-r--r--fs/cifs/cifsproto.h16
-rw-r--r--fs/cifs/cifssmb.c23
-rw-r--r--fs/cifs/connect.c101
-rw-r--r--fs/cifs/dir.c40
-rw-r--r--fs/cifs/file.c75
-rw-r--r--fs/cifs/inode.c13
-rw-r--r--fs/cifs/misc.c7
-rw-r--r--fs/cifs/netmisc.c2
-rw-r--r--fs/cifs/readdir.c6
-rw-r--r--fs/cifs/smb2glob.h5
-rw-r--r--fs/cifs/smb2inode.c43
-rw-r--r--fs/cifs/smb2maperror.c11
-rw-r--r--fs/cifs/smb2misc.c126
-rw-r--r--fs/cifs/smb2ops.c320
-rw-r--r--fs/cifs/smb2pdu.c413
-rw-r--r--fs/cifs/smb2pdu.h84
-rw-r--r--fs/cifs/smb2proto.h9
-rw-r--r--fs/cifs/smb2transport.c10
-rw-r--r--fs/cifs/trace.c18
-rw-r--r--fs/cifs/trace.h429
-rw-r--r--fs/cifs/transport.c4
-rw-r--r--fs/cramfs/inode.c7
-rw-r--r--fs/dax.c2
-rw-r--r--fs/dcache.c227
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/dlm/lowcomms.c16
-rw-r--r--fs/ecryptfs/inode.c3
-rw-r--r--fs/eventfd.c15
-rw-r--r--fs/eventpoll.c5
-rw-r--r--fs/exofs/ore.c10
-rw-r--r--fs/exofs/super.c2
-rw-r--r--fs/ext2/inode.c10
-rw-r--r--fs/ext2/namei.c6
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/mballoc.c29
-rw-r--r--fs/ext4/namei.c6
-rw-r--r--fs/ext4/sysfs.c49
-rw-r--r--fs/f2fs/namei.c12
-rw-r--r--fs/f2fs/sysfs.c29
-rw-r--r--fs/fat/namei_msdos.c4
-rw-r--r--fs/fat/namei_vfat.c13
-rw-r--r--fs/fcntl.c15
-rw-r--r--fs/filesystems.c14
-rw-r--r--fs/freevxfs/vxfs_lookup.c8
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fscache/histogram.c17
-rw-r--r--fs/fscache/internal.h5
-rw-r--r--fs/fscache/proc.c8
-rw-r--r--fs/fscache/stats.c17
-rw-r--r--fs/gfs2/aops.c69
-rw-r--r--fs/gfs2/bmap.c428
-rw-r--r--fs/gfs2/bmap.h6
-rw-r--r--fs/gfs2/file.c6
-rw-r--r--fs/gfs2/incore.h3
-rw-r--r--fs/gfs2/inode.c4
-rw-r--r--fs/gfs2/log.h7
-rw-r--r--fs/gfs2/ops_fstype.c19
-rw-r--r--fs/gfs2/quota.c5
-rw-r--r--fs/gfs2/rgrp.c2
-rw-r--r--fs/gfs2/trans.c27
-rw-r--r--fs/hfs/dir.c20
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfsplus/dir.c3
-rw-r--r--fs/hfsplus/super.c1
-rw-r--r--fs/inode.c1
-rw-r--r--fs/internal.h1
-rw-r--r--fs/jffs2/dir.c12
-rw-r--r--fs/jfs/jfs_debug.c43
-rw-r--r--fs/jfs/jfs_debug.h10
-rw-r--r--fs/jfs/jfs_logmgr.c14
-rw-r--r--fs/jfs/jfs_metapage.c14
-rw-r--r--fs/jfs/jfs_txnmgr.c28
-rw-r--r--fs/jfs/jfs_xtree.c14
-rw-r--r--fs/jfs/namei.c12
-rw-r--r--fs/kernfs/mount.c1
-rw-r--r--fs/locks.c16
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c19
-rw-r--r--fs/nfs/client.c43
-rw-r--r--fs/nfsd/blocklayout.c2
-rw-r--r--fs/nfsd/vfs.c22
-rw-r--r--fs/nilfs2/namei.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c11
-rw-r--r--fs/ocfs2/refcounttree.c14
-rw-r--r--fs/omfs/dir.c7
-rw-r--r--fs/open.c44
-rw-r--r--fs/openpromfs/inode.c3
-rw-r--r--fs/orangefs/namei.c73
-rw-r--r--fs/pipe.c22
-rw-r--r--fs/proc/array.c48
-rw-r--r--fs/proc/base.c162
-rw-r--r--fs/proc/cmdline.c14
-rw-r--r--fs/proc/consoles.c14
-rw-r--r--fs/proc/devices.c14
-rw-r--r--fs/proc/fd.c138
-rw-r--r--fs/proc/generic.c151
-rw-r--r--fs/proc/internal.h17
-rw-r--r--fs/proc/interrupts.c14
-rw-r--r--fs/proc/kcore.c23
-rw-r--r--fs/proc/loadavg.c14
-rw-r--r--fs/proc/meminfo.c14
-rw-r--r--fs/proc/namespaces.c24
-rw-r--r--fs/proc/nommu.c14
-rw-r--r--fs/proc/proc_net.c104
-rw-r--r--fs/proc/proc_sysctl.c15
-rw-r--r--fs/proc/proc_tty.c22
-rw-r--r--fs/proc/self.c4
-rw-r--r--fs/proc/softirqs.c14
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/thread_self.c4
-rw-r--r--fs/proc/uptime.c14
-rw-r--r--fs/proc/version.c14
-rw-r--r--fs/qnx4/namei.c8
-rw-r--r--fs/qnx6/namei.c8
-rw-r--r--fs/read_write.c6
-rw-r--r--fs/reiserfs/namei.c12
-rw-r--r--fs/reiserfs/procfs.c16
-rw-r--r--fs/romfs/super.c36
-rw-r--r--fs/select.c85
-rw-r--r--fs/seq_file.c5
-rw-r--r--fs/super.c32
-rw-r--r--fs/sysfs/mount.c6
-rw-r--r--fs/sysv/namei.c9
-rw-r--r--fs/timerfd.c22
-rw-r--r--fs/ubifs/dir.c43
-rw-r--r--fs/udf/namei.c6
-rw-r--r--fs/ufs/namei.c6
-rw-r--r--fs/xattr.c4
-rw-r--r--fs/xfs/libxfs/xfs_attr.c9
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c4
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c21
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_aops.h2
-rw-r--r--fs/xfs/xfs_file.c24
-rw-r--r--fs/xfs/xfs_iops.c16
-rw-r--r--fs/xfs/xfs_stats.c33
-rw-r--r--fs/xfs/xfs_super.c11
223 files changed, 7120 insertions, 5639 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9ee534159cc6..42e102e2e74a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -823,28 +823,21 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
if (IS_ERR(dfid))
return ERR_CAST(dfid);
- name = dentry->d_name.name;
- fid = p9_client_walk(dfid, 1, &name, 1);
- if (IS_ERR(fid)) {
- if (fid == ERR_PTR(-ENOENT)) {
- d_add(dentry, NULL);
- return NULL;
- }
- return ERR_CAST(fid);
- }
/*
* Make sure we don't use a wrong inode due to parallel
* unlink. For cached mode create calls request for new
* inode. But with cache disabled, lookup should do this.
*/
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+ name = dentry->d_name.name;
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (fid == ERR_PTR(-ENOENT))
+ inode = NULL;
+ else if (IS_ERR(fid))
+ inode = ERR_CAST(fid);
+ else if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
else
inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
- if (IS_ERR(inode)) {
- p9_client_clunk(fid);
- return ERR_CAST(inode);
- }
/*
* If we had a rename on the server and a parallel lookup
* for the new name, then make sure we instantiate with
@@ -853,12 +846,14 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
* k/b.
*/
res = d_splice_alias(inode, dentry);
- if (!res)
- v9fs_fid_add(dentry, fid);
- else if (!IS_ERR(res))
- v9fs_fid_add(res, fid);
- else
- p9_client_clunk(fid);
+ if (!IS_ERR(fid)) {
+ if (!res)
+ v9fs_fid_add(dentry, fid);
+ else if (!IS_ERR(res))
+ v9fs_fid_add(res, fid);
+ else
+ p9_client_clunk(fid);
+ }
return res;
}
diff --git a/fs/Kconfig b/fs/Kconfig
index bc821a86d965..ac4ac908f001 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -196,7 +196,7 @@ config HUGETLBFS
help
hugetlbfs is a filesystem backing for HugeTLB pages, based on
ramfs. For architectures that support it, say Y here and read
- <file:Documentation/vm/hugetlbpage.txt> for details.
+ <file:Documentation/admin-guide/mm/hugetlbpage.rst> for details.
If unsure, say N.
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 29444c83da48..e18eff854e1a 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -146,20 +146,6 @@ adfs_dir_lookup_byname(struct inode *inode, const struct qstr *name, struct obje
obj->parent_id = inode->i_ino;
- /*
- * '.' is handled by reserved_lookup() in fs/namei.c
- */
- if (name->len == 2 && name->name[0] == '.' && name->name[1] == '.') {
- /*
- * Currently unable to fill in the rest of 'obj',
- * but this is better than nothing. We need to
- * ascend one level to find it's parent.
- */
- obj->name_len = 0;
- obj->file_id = obj->parent_id;
- goto free_out;
- }
-
read_lock(&adfs_dir_lock);
ret = ops->setpos(&dir, 0);
@@ -266,17 +252,17 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj);
if (error == 0) {
- error = -EACCES;
/*
* This only returns NULL if get_empty_inode
* fails.
*/
inode = adfs_iget(dir->i_sb, &obj);
- if (inode)
- error = 0;
+ if (!inode)
+ inode = ERR_PTR(-EACCES);
+ } else if (error != -ENOENT) {
+ inode = ERR_PTR(error);
}
- d_add(dentry, inode);
- return ERR_PTR(error);
+ return d_splice_alias(inode, dentry);
}
/*
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index d8aa0ae3d037..41c5749f4db7 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -201,14 +201,16 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
struct super_block *sb = dir->i_sb;
struct buffer_head *bh;
struct inode *inode = NULL;
+ struct dentry *res;
pr_debug("%s(\"%pd\")\n", __func__, dentry);
affs_lock_dir(dir);
bh = affs_find_entry(dir, dentry);
- affs_unlock_dir(dir);
- if (IS_ERR(bh))
+ if (IS_ERR(bh)) {
+ affs_unlock_dir(dir);
return ERR_CAST(bh);
+ }
if (bh) {
u32 ino = bh->b_blocknr;
@@ -222,11 +224,12 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
}
affs_brelse(bh);
inode = affs_iget(sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
}
- d_add(dentry, inode);
- return NULL;
+ res = d_splice_alias(inode, dentry);
+ if (!IS_ERR_OR_NULL(res))
+ res->d_fsdata = dentry->d_fsdata;
+ affs_unlock_dir(dir);
+ return res;
}
int
diff --git a/fs/affs/super.c b/fs/affs/super.c
index e602619aed9d..d1ad11a8a4a5 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -241,6 +241,7 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
affs_set_opt(*mount_opts, SF_NO_TRUNCATE);
break;
case Opt_prefix:
+ kfree(*prefix);
*prefix = match_strdup(&args[0]);
if (!*prefix)
return 0;
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 3bedfed608a2..7587fb665ff1 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -121,7 +121,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
p = text;
do {
struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
- char tdelim = delim;
+ const char *q, *stop;
if (*p == delim) {
p++;
@@ -130,28 +130,33 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
if (*p == '[') {
p++;
- tdelim = ']';
+ q = memchr(p, ']', end - p);
+ } else {
+ for (q = p; q < end; q++)
+ if (*q == '+' || *q == delim)
+ break;
}
- if (in4_pton(p, end - p,
+ if (in4_pton(p, q - p,
(u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
- tdelim, &p)) {
+ -1, &stop)) {
srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
- } else if (in6_pton(p, end - p,
+ } else if (in6_pton(p, q - p,
srx->transport.sin6.sin6_addr.s6_addr,
- tdelim, &p)) {
+ -1, &stop)) {
/* Nothing to do */
} else {
goto bad_address;
}
- if (tdelim == ']') {
- if (p == end || *p != ']')
- goto bad_address;
+ if (stop != q)
+ goto bad_address;
+
+ p = q;
+ if (q < end && *q == ']')
p++;
- }
if (p < end) {
if (*p == '+') {
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index abd9a84f4e88..571437dcb252 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -23,36 +23,55 @@
/*
* Set up an interest-in-callbacks record for a volume on a server and
* register it with the server.
- * - Called with volume->server_sem held.
+ * - Called with vnode->io_lock held.
*/
int afs_register_server_cb_interest(struct afs_vnode *vnode,
- struct afs_server_entry *entry)
+ struct afs_server_list *slist,
+ unsigned int index)
{
- struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x;
+ struct afs_server_entry *entry = &slist->servers[index];
+ struct afs_cb_interest *cbi, *vcbi, *new, *old;
struct afs_server *server = entry->server;
again:
+ if (vnode->cb_interest &&
+ likely(vnode->cb_interest == entry->cb_interest))
+ return 0;
+
+ read_lock(&slist->lock);
+ cbi = afs_get_cb_interest(entry->cb_interest);
+ read_unlock(&slist->lock);
+
vcbi = vnode->cb_interest;
if (vcbi) {
- if (vcbi == cbi)
+ if (vcbi == cbi) {
+ afs_put_cb_interest(afs_v2net(vnode), cbi);
return 0;
+ }
+ /* Use a new interest in the server list for the same server
+ * rather than an old one that's still attached to a vnode.
+ */
if (cbi && vcbi->server == cbi->server) {
write_seqlock(&vnode->cb_lock);
- vnode->cb_interest = afs_get_cb_interest(cbi);
+ old = vnode->cb_interest;
+ vnode->cb_interest = cbi;
write_sequnlock(&vnode->cb_lock);
- afs_put_cb_interest(afs_v2net(vnode), cbi);
+ afs_put_cb_interest(afs_v2net(vnode), old);
return 0;
}
+ /* Re-use the one attached to the vnode. */
if (!cbi && vcbi->server == server) {
- afs_get_cb_interest(vcbi);
- x = cmpxchg(&entry->cb_interest, cbi, vcbi);
- if (x != cbi) {
- cbi = x;
- afs_put_cb_interest(afs_v2net(vnode), vcbi);
+ write_lock(&slist->lock);
+ if (entry->cb_interest) {
+ write_unlock(&slist->lock);
+ afs_put_cb_interest(afs_v2net(vnode), cbi);
goto again;
}
+
+ entry->cb_interest = cbi;
+ write_unlock(&slist->lock);
return 0;
}
}
@@ -72,13 +91,16 @@ again:
list_add_tail(&new->cb_link, &server->cb_interests);
write_unlock(&server->cb_break_lock);
- x = cmpxchg(&entry->cb_interest, cbi, new);
- if (x == cbi) {
+ write_lock(&slist->lock);
+ if (!entry->cb_interest) {
+ entry->cb_interest = afs_get_cb_interest(new);
cbi = new;
+ new = NULL;
} else {
- cbi = x;
- afs_put_cb_interest(afs_v2net(vnode), new);
+ cbi = afs_get_cb_interest(entry->cb_interest);
}
+ write_unlock(&slist->lock);
+ afs_put_cb_interest(afs_v2net(vnode), new);
}
ASSERT(cbi);
@@ -88,11 +110,14 @@ again:
*/
write_seqlock(&vnode->cb_lock);
- vnode->cb_interest = afs_get_cb_interest(cbi);
+ old = vnode->cb_interest;
+ vnode->cb_interest = cbi;
vnode->cb_s_break = cbi->server->cb_s_break;
+ vnode->cb_v_break = vnode->volume->cb_v_break;
clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
write_sequnlock(&vnode->cb_lock);
+ afs_put_cb_interest(afs_v2net(vnode), old);
return 0;
}
@@ -171,13 +196,24 @@ static void afs_break_one_callback(struct afs_server *server,
if (cbi->vid != fid->vid)
continue;
- data.volume = NULL;
- data.fid = *fid;
- inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data);
- if (inode) {
- vnode = AFS_FS_I(inode);
- afs_break_callback(vnode);
- iput(inode);
+ if (fid->vnode == 0 && fid->unique == 0) {
+ /* The callback break applies to an entire volume. */
+ struct afs_super_info *as = AFS_FS_S(cbi->sb);
+ struct afs_volume *volume = as->volume;
+
+ write_lock(&volume->cb_break_lock);
+ volume->cb_v_break++;
+ write_unlock(&volume->cb_break_lock);
+ } else {
+ data.volume = NULL;
+ data.fid = *fid;
+ inode = ilookup5_nowait(cbi->sb, fid->vnode,
+ afs_iget5_test, &data);
+ if (inode) {
+ vnode = AFS_FS_I(inode);
+ afs_break_callback(vnode);
+ iput(inode);
+ }
}
}
@@ -195,6 +231,8 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
ASSERT(server != NULL);
ASSERTCMP(count, <=, AFSCBMAX);
+ /* TODO: Sort the callback break list by volume ID */
+
for (; count > 0; callbacks++, count--) {
_debug("- Fid { vl=%08x n=%u u=%u } CB { v=%u x=%u t=%u }",
callbacks->fid.vid,
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 357de908df3a..c332c95a6940 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -133,21 +133,10 @@ bool afs_cm_incoming_call(struct afs_call *call)
}
/*
- * clean up a cache manager call
+ * Clean up a cache manager call.
*/
static void afs_cm_destructor(struct afs_call *call)
{
- _enter("");
-
- /* Break the callbacks here so that we do it after the final ACK is
- * received. The step number here must match the final number in
- * afs_deliver_cb_callback().
- */
- if (call->unmarshall == 5) {
- ASSERT(call->cm_server && call->count && call->request);
- afs_break_callbacks(call->cm_server, call->count, call->request);
- }
-
kfree(call->buffer);
call->buffer = NULL;
}
@@ -161,14 +150,14 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
_enter("");
- /* be sure to send the reply *before* attempting to spam the AFS server
- * with FSFetchStatus requests on the vnodes with broken callbacks lest
- * the AFS server get into a vicious cycle of trying to break further
- * callbacks because it hadn't received completion of the CBCallBack op
- * yet */
- afs_send_empty_reply(call);
+ /* We need to break the callbacks before sending the reply as the
+ * server holds up change visibility till it receives our reply so as
+ * to maintain cache coherency.
+ */
+ if (call->cm_server)
+ afs_break_callbacks(call->cm_server, call->count, call->request);
- afs_break_callbacks(call->cm_server, call->count, call->request);
+ afs_send_empty_reply(call);
afs_put_call(call);
_leave("");
}
@@ -180,7 +169,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
{
struct afs_callback_break *cb;
struct sockaddr_rxrpc srx;
- struct afs_server *server;
__be32 *bp;
int ret, loop;
@@ -267,15 +255,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
call->offset = 0;
call->unmarshall++;
-
- /* Record that the message was unmarshalled successfully so
- * that the call destructor can know do the callback breaking
- * work, even if the final ACK isn't received.
- *
- * If the step number changes, then afs_cm_destructor() must be
- * updated also.
- */
- call->unmarshall++;
case 5:
break;
}
@@ -286,10 +265,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
- server = afs_find_server(call->net, &srx);
- if (!server)
- return -ENOTCONN;
- call->cm_server = server;
+ call->cm_server = afs_find_server(call->net, &srx);
+ if (!call->cm_server)
+ trace_afs_cm_no_server(call, &srx);
return afs_queue_call_work(call);
}
@@ -303,7 +281,8 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
_enter("{%p}", call->cm_server);
- afs_init_callback_state(call->cm_server);
+ if (call->cm_server)
+ afs_init_callback_state(call->cm_server);
afs_send_empty_reply(call);
afs_put_call(call);
_leave("");
@@ -315,7 +294,6 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
{
struct sockaddr_rxrpc srx;
- struct afs_server *server;
int ret;
_enter("");
@@ -328,10 +306,9 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- server = afs_find_server(call->net, &srx);
- if (!server)
- return -ENOTCONN;
- call->cm_server = server;
+ call->cm_server = afs_find_server(call->net, &srx);
+ if (!call->cm_server)
+ trace_afs_cm_no_server(call, &srx);
return afs_queue_call_work(call);
}
@@ -341,8 +318,6 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
*/
static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
{
- struct sockaddr_rxrpc srx;
- struct afs_server *server;
struct afs_uuid *r;
unsigned loop;
__be32 *b;
@@ -398,11 +373,11 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
- server = afs_find_server(call->net, &srx);
- if (!server)
- return -ENOTCONN;
- call->cm_server = server;
+ rcu_read_lock();
+ call->cm_server = afs_find_server_by_uuid(call->net, call->request);
+ rcu_read_unlock();
+ if (!call->cm_server)
+ trace_afs_cm_no_server_u(call, call->request);
return afs_queue_call_work(call);
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5889f70d4d27..7d623008157f 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -180,6 +180,7 @@ static int afs_dir_open(struct inode *inode, struct file *file)
* get reclaimed during the iteration.
*/
static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
+ __acquires(&dvnode->validate_lock)
{
struct afs_read *req;
loff_t i_size;
@@ -261,18 +262,21 @@ retry:
/* If we're going to reload, we need to lock all the pages to prevent
* races.
*/
- if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
- ret = -ERESTARTSYS;
- for (i = 0; i < req->nr_pages; i++)
- if (lock_page_killable(req->pages[i]) < 0)
- goto error_unlock;
+ ret = -ERESTARTSYS;
+ if (down_read_killable(&dvnode->validate_lock) < 0)
+ goto error;
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- goto success;
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ goto success;
+
+ up_read(&dvnode->validate_lock);
+ if (down_write_killable(&dvnode->validate_lock) < 0)
+ goto error;
+ if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
ret = afs_fetch_data(dvnode, key, req);
if (ret < 0)
- goto error_unlock_all;
+ goto error_unlock;
task_io_account_read(PAGE_SIZE * req->nr_pages);
@@ -284,33 +288,26 @@ retry:
for (i = 0; i < req->nr_pages; i++)
if (!afs_dir_check_page(dvnode, req->pages[i],
req->actual_len))
- goto error_unlock_all;
+ goto error_unlock;
// TODO: Trim excess pages
set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
}
+ downgrade_write(&dvnode->validate_lock);
success:
- i = req->nr_pages;
- while (i > 0)
- unlock_page(req->pages[--i]);
return req;
-error_unlock_all:
- i = req->nr_pages;
error_unlock:
- while (i > 0)
- unlock_page(req->pages[--i]);
+ up_write(&dvnode->validate_lock);
error:
afs_put_read(req);
_leave(" = %d", ret);
return ERR_PTR(ret);
content_has_grown:
- i = req->nr_pages;
- while (i > 0)
- unlock_page(req->pages[--i]);
+ up_write(&dvnode->validate_lock);
afs_put_read(req);
goto retry;
}
@@ -473,6 +470,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
}
out:
+ up_read(&dvnode->validate_lock);
afs_put_read(req);
_leave(" = %d", ret);
return ret;
@@ -1143,7 +1141,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
&newfid, &newstatus, &newcb);
}
@@ -1213,7 +1211,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_remove(&fc, dentry->d_name.name, true,
data_version);
}
@@ -1316,7 +1314,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_remove(&fc, dentry->d_name.name, false,
data_version);
}
@@ -1373,7 +1371,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
&newfid, &newstatus, &newcb);
}
@@ -1443,8 +1441,8 @@ static int afs_link(struct dentry *from, struct inode *dir,
}
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
- fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+ fc.cb_break_2 = afs_calc_vnode_cb_break(vnode);
afs_fs_link(&fc, vnode, dentry->d_name.name, data_version);
}
@@ -1512,7 +1510,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_symlink(&fc, dentry->d_name.name,
content, data_version,
&newfid, &newstatus);
@@ -1588,8 +1586,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
}
while (afs_select_fileserver(&fc)) {
- fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break;
- fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(orig_dvnode);
+ fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode);
afs_fs_rename(&fc, old_dentry->d_name.name,
new_dvnode, new_dentry->d_name.name,
orig_data_version, new_data_version);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index c24c08016dd9..7d4f26198573 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -238,7 +238,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_fetch_data(&fc, desc);
}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 7a0e017070ec..dc62d15a964b 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -86,7 +86,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_set_lock(&fc, type);
}
@@ -117,7 +117,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_current_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_extend_lock(&fc);
}
@@ -148,7 +148,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_current_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_release_lock(&fc);
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index efacdb7c1dee..b273e1d60478 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -134,6 +134,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
struct afs_read *read_req)
{
const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp;
+ bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
u64 data_version, size;
u32 type, abort_code;
u8 flags = 0;
@@ -142,13 +143,32 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
if (vnode)
write_seqlock(&vnode->cb_lock);
+ abort_code = ntohl(xdr->abort_code);
+
if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) {
+ if (xdr->if_version == htonl(0) &&
+ abort_code != 0 &&
+ inline_error) {
+ /* The OpenAFS fileserver has a bug in FS.InlineBulkStatus
+ * whereby it doesn't set the interface version in the error
+ * case.
+ */
+ status->abort_code = abort_code;
+ ret = 0;
+ goto out;
+ }
+
pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version));
goto bad;
}
+ if (abort_code != 0 && inline_error) {
+ status->abort_code = abort_code;
+ ret = 0;
+ goto out;
+ }
+
type = ntohl(xdr->type);
- abort_code = ntohl(xdr->abort_code);
switch (type) {
case AFS_FTYPE_FILE:
case AFS_FTYPE_DIR:
@@ -165,13 +185,6 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
}
status->type = type;
break;
- case AFS_FTYPE_INVALID:
- if (abort_code != 0) {
- status->abort_code = abort_code;
- ret = 0;
- goto out;
- }
- /* Fall through */
default:
goto bad;
}
@@ -248,7 +261,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
write_seqlock(&vnode->cb_lock);
- if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) {
+ if (call->cb_break == afs_cb_break_sum(vnode, cbi)) {
vnode->cb_version = ntohl(*bp++);
cb_expiry = ntohl(*bp++);
vnode->cb_type = ntohl(*bp++);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 06194cfe9724..479b7fdda124 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -108,7 +108,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_fetch_file_status(&fc, NULL, new_inode);
}
@@ -393,15 +393,18 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
read_seqlock_excl(&vnode->cb_lock);
if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
- if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) {
+ if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break ||
+ vnode->cb_v_break != vnode->volume->cb_v_break) {
vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
+ vnode->cb_v_break = vnode->volume->cb_v_break;
+ valid = false;
} else if (vnode->status.type == AFS_FTYPE_DIR &&
test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) &&
vnode->cb_expires_at - 10 > now) {
- valid = true;
+ valid = true;
} else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
vnode->cb_expires_at - 10 > now) {
- valid = true;
+ valid = true;
}
} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
valid = true;
@@ -415,7 +418,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
if (valid)
goto valid;
- mutex_lock(&vnode->validate_lock);
+ down_write(&vnode->validate_lock);
/* if the promise has expired, we need to check the server again to get
* a new promise - note that if the (parent) directory's metadata was
@@ -444,13 +447,13 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
* different */
if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
afs_zap_data(vnode);
- mutex_unlock(&vnode->validate_lock);
+ up_write(&vnode->validate_lock);
valid:
_leave(" = 0");
return 0;
error_unlock:
- mutex_unlock(&vnode->validate_lock);
+ up_write(&vnode->validate_lock);
_leave(" = %d", ret);
return ret;
}
@@ -574,7 +577,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_setattr(&fc, attr);
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index f8086ec95e24..e3f8a46663db 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -396,6 +396,7 @@ struct afs_server {
#define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */
#define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */
#define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */
+#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */
atomic_t usage;
u32 addr_version; /* Address list version */
@@ -433,6 +434,7 @@ struct afs_server_list {
unsigned short index; /* Server currently in use */
unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */
unsigned int seq; /* Set to ->servers_seq when installed */
+ rwlock_t lock;
struct afs_server_entry servers[];
};
@@ -459,6 +461,9 @@ struct afs_volume {
rwlock_t servers_lock; /* Lock for ->servers */
unsigned int servers_seq; /* Incremented each time ->servers changes */
+ unsigned cb_v_break; /* Break-everything counter. */
+ rwlock_t cb_break_lock;
+
afs_voltype_t type; /* type of volume */
short error;
char type_force; /* force volume type (suppress R/O -> R/W) */
@@ -494,7 +499,7 @@ struct afs_vnode {
#endif
struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
struct mutex io_lock; /* Lock for serialising I/O on this mutex */
- struct mutex validate_lock; /* lock for validating this vnode */
+ struct rw_semaphore validate_lock; /* lock for validating this vnode */
spinlock_t wb_lock; /* lock for wb_keys */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
@@ -519,6 +524,7 @@ struct afs_vnode {
/* outstanding callback notification on this file */
struct afs_cb_interest *cb_interest; /* Server on which this resides */
unsigned int cb_s_break; /* Mass break counter on ->server */
+ unsigned int cb_v_break; /* Mass break counter on ->volume */
unsigned int cb_break; /* Break counter on vnode */
seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */
@@ -648,16 +654,29 @@ extern void afs_init_callback_state(struct afs_server *);
extern void afs_break_callback(struct afs_vnode *);
extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
-extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *);
+extern int afs_register_server_cb_interest(struct afs_vnode *,
+ struct afs_server_list *, unsigned int);
extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *);
extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *);
static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi)
{
- refcount_inc(&cbi->usage);
+ if (cbi)
+ refcount_inc(&cbi->usage);
return cbi;
}
+static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
+{
+ return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
+}
+
+static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode,
+ struct afs_cb_interest *cbi)
+{
+ return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break;
+}
+
/*
* cell.c
*/
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 839a22280606..3aad32762989 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -62,7 +62,6 @@ static const struct file_operations afs_proc_rootcell_fops = {
.llseek = no_llseek,
};
-static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
static void *afs_proc_cell_volumes_start(struct seq_file *p, loff_t *pos);
static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
loff_t *pos);
@@ -76,15 +75,6 @@ static const struct seq_operations afs_proc_cell_volumes_ops = {
.show = afs_proc_cell_volumes_show,
};
-static const struct file_operations afs_proc_cell_volumes_fops = {
- .open = afs_proc_cell_volumes_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int afs_proc_cell_vlservers_open(struct inode *inode,
- struct file *file);
static void *afs_proc_cell_vlservers_start(struct seq_file *p, loff_t *pos);
static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
loff_t *pos);
@@ -98,14 +88,6 @@ static const struct seq_operations afs_proc_cell_vlservers_ops = {
.show = afs_proc_cell_vlservers_show,
};
-static const struct file_operations afs_proc_cell_vlservers_fops = {
- .open = afs_proc_cell_vlservers_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int afs_proc_servers_open(struct inode *inode, struct file *file);
static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos);
static void *afs_proc_servers_next(struct seq_file *p, void *v,
loff_t *pos);
@@ -119,13 +101,6 @@ static const struct seq_operations afs_proc_servers_ops = {
.show = afs_proc_servers_show,
};
-static const struct file_operations afs_proc_servers_fops = {
- .open = afs_proc_servers_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int afs_proc_sysname_open(struct inode *inode, struct file *file);
static int afs_proc_sysname_release(struct inode *inode, struct file *file);
static void *afs_proc_sysname_start(struct seq_file *p, loff_t *pos);
@@ -152,7 +127,7 @@ static const struct file_operations afs_proc_sysname_fops = {
.write = afs_proc_sysname_write,
};
-static const struct file_operations afs_proc_stats_fops;
+static int afs_proc_stats_show(struct seq_file *m, void *v);
/*
* initialise the /proc/fs/afs/ directory
@@ -167,8 +142,8 @@ int afs_proc_init(struct afs_net *net)
if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) ||
!proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) ||
- !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops) ||
- !proc_create("stats", 0644, net->proc_afs, &afs_proc_stats_fops) ||
+ !proc_create_seq("servers", 0644, net->proc_afs, &afs_proc_servers_ops) ||
+ !proc_create_single("stats", 0644, net->proc_afs, afs_proc_stats_show) ||
!proc_create("sysname", 0644, net->proc_afs, &afs_proc_sysname_fops))
goto error_tree;
@@ -196,16 +171,7 @@ void afs_proc_cleanup(struct afs_net *net)
*/
static int afs_proc_cells_open(struct inode *inode, struct file *file)
{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &afs_proc_cells_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = PDE_DATA(inode);
- return 0;
+ return seq_open(file, &afs_proc_cells_ops);
}
/*
@@ -430,10 +396,11 @@ int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell)
if (!dir)
goto error_dir;
- if (!proc_create_data("vlservers", 0, dir,
- &afs_proc_cell_vlservers_fops, cell) ||
- !proc_create_data("volumes", 0, dir,
- &afs_proc_cell_volumes_fops, cell))
+ if (!proc_create_seq_data("vlservers", 0, dir,
+ &afs_proc_cell_vlservers_ops, cell))
+ goto error_tree;
+ if (!proc_create_seq_data("volumes", 0, dir, &afs_proc_cell_volumes_ops,
+ cell))
goto error_tree;
_leave(" = 0");
@@ -459,36 +426,13 @@ void afs_proc_cell_remove(struct afs_net *net, struct afs_cell *cell)
}
/*
- * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells
- */
-static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
-{
- struct afs_cell *cell;
- struct seq_file *m;
- int ret;
-
- cell = PDE_DATA(inode);
- if (!cell)
- return -ENOENT;
-
- ret = seq_open(file, &afs_proc_cell_volumes_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = cell;
-
- return 0;
-}
-
-/*
* set up the iterator to start reading from the cells list and return the
* first item
*/
static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
__acquires(cell->proc_lock)
{
- struct afs_cell *cell = m->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(m->file));
_enter("cell=%p pos=%Ld", cell, *_pos);
@@ -502,7 +446,7 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
loff_t *_pos)
{
- struct afs_cell *cell = p->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(p->file));
_enter("cell=%p pos=%Ld", cell, *_pos);
return seq_list_next(v, &cell->proc_volumes, _pos);
@@ -514,7 +458,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
__releases(cell->proc_lock)
{
- struct afs_cell *cell = p->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(p->file));
read_unlock(&cell->proc_lock);
}
@@ -530,7 +474,7 @@ static const char afs_vol_types[3][3] = {
*/
static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
{
- struct afs_cell *cell = m->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(m->file));
struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link);
/* Display header on line 1 */
@@ -547,30 +491,6 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
}
/*
- * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume
- * location server
- */
-static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
-{
- struct afs_cell *cell;
- struct seq_file *m;
- int ret;
-
- cell = PDE_DATA(inode);
- if (!cell)
- return -ENOENT;
-
- ret = seq_open(file, &afs_proc_cell_vlservers_ops);
- if (ret<0)
- return ret;
-
- m = file->private_data;
- m->private = cell;
-
- return 0;
-}
-
-/*
* set up the iterator to start reading from the cells list and return the
* first item
*/
@@ -578,7 +498,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
__acquires(rcu)
{
struct afs_addr_list *alist;
- struct afs_cell *cell = m->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(m->file));
loff_t pos = *_pos;
rcu_read_lock();
@@ -603,7 +523,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
loff_t *_pos)
{
struct afs_addr_list *alist;
- struct afs_cell *cell = p->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(p->file));
loff_t pos;
alist = rcu_dereference(cell->vl_addrs);
@@ -644,15 +564,6 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
}
/*
- * open "/proc/fs/afs/servers" which provides a summary of active
- * servers
- */
-static int afs_proc_servers_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &afs_proc_servers_ops);
-}
-
-/*
* Set up the iterator to start reading from the server list and return the
* first item.
*/
@@ -931,18 +842,3 @@ static int afs_proc_stats_show(struct seq_file *m, void *v)
atomic_long_read(&net->n_store_bytes));
return 0;
}
-
-/*
- * Open "/proc/fs/afs/stats" to allow reading of the stat counters.
- */
-static int afs_proc_stats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, afs_proc_stats_show, NULL);
-}
-
-static const struct file_operations afs_proc_stats_fops = {
- .open = afs_proc_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index ac0feac9d746..e065bc0768e6 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -179,7 +179,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
*/
if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
fc->ac.error = -EREMOTEIO;
- goto failed;
+ goto next_server;
}
write_lock(&vnode->volume->servers_lock);
@@ -201,7 +201,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
*/
if (vnode->volume->servers == fc->server_list) {
fc->ac.error = -EREMOTEIO;
- goto failed;
+ goto next_server;
}
/* Try again */
@@ -350,8 +350,8 @@ use_server:
* break request before we've finished decoding the reply and
* installing the vnode.
*/
- fc->ac.error = afs_register_server_cb_interest(
- vnode, &fc->server_list->servers[fc->index]);
+ fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list,
+ fc->index);
if (fc->ac.error < 0)
goto failed;
@@ -369,8 +369,16 @@ use_server:
if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
fc->ac.alist = afs_get_addrlist(alist);
- if (!afs_probe_fileserver(fc))
- goto failed;
+ if (!afs_probe_fileserver(fc)) {
+ switch (fc->ac.error) {
+ case -ENOMEM:
+ case -ERESTARTSYS:
+ case -EINTR:
+ goto failed;
+ default:
+ goto next_server;
+ }
+ }
}
if (!fc->ac.alist)
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 5c6263972ec9..08735948f15d 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -41,6 +41,7 @@ int afs_open_socket(struct afs_net *net)
{
struct sockaddr_rxrpc srx;
struct socket *socket;
+ unsigned int min_level;
int ret;
_enter("");
@@ -60,6 +61,12 @@ int afs_open_socket(struct afs_net *net)
srx.transport.sin6.sin6_family = AF_INET6;
srx.transport.sin6.sin6_port = htons(AFS_CM_PORT);
+ min_level = RXRPC_SECURITY_ENCRYPT;
+ ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
+ (void *)&min_level, sizeof(min_level));
+ if (ret < 0)
+ goto error_2;
+
ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
if (ret == -EADDRINUSE) {
srx.transport.sin6.sin6_port = 0;
@@ -482,8 +489,12 @@ static void afs_deliver_to_call(struct afs_call *call)
state = READ_ONCE(call->state);
switch (ret) {
case 0:
- if (state == AFS_CALL_CL_PROC_REPLY)
+ if (state == AFS_CALL_CL_PROC_REPLY) {
+ if (call->cbi)
+ set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
+ &call->cbi->server->flags);
goto call_complete;
+ }
ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY);
goto done;
case -EINPROGRESS:
@@ -493,11 +504,6 @@ static void afs_deliver_to_call(struct afs_call *call)
case -ECONNABORTED:
ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
goto done;
- case -ENOTCONN:
- abort_code = RX_CALL_DEAD;
- rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- abort_code, ret, "KNC");
- goto local_abort;
case -ENOTSUPP:
abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
diff --git a/fs/afs/security.c b/fs/afs/security.c
index cea2fff313dc..81dfedb7879f 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -147,8 +147,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
break;
}
- if (cb_break != (vnode->cb_break +
- vnode->cb_interest->server->cb_s_break)) {
+ if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) {
changed = true;
break;
}
@@ -178,7 +177,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
}
}
- if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break))
+ if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest))
goto someone_else_changed_it;
/* We need a ref on any permits list we want to copy as we'll have to
@@ -257,7 +256,7 @@ found:
spin_lock(&vnode->lock);
zap = rcu_access_pointer(vnode->permit_cache);
- if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) &&
+ if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) &&
zap == permits)
rcu_assign_pointer(vnode->permit_cache, replacement);
else
@@ -373,18 +372,14 @@ int afs_permission(struct inode *inode, int mask)
mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file");
if (S_ISDIR(inode->i_mode)) {
- if (mask & MAY_EXEC) {
+ if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) {
if (!(access & AFS_ACE_LOOKUP))
goto permission_denied;
- } else if (mask & MAY_READ) {
- if (!(access & AFS_ACE_LOOKUP))
- goto permission_denied;
- } else if (mask & MAY_WRITE) {
+ }
+ if (mask & MAY_WRITE) {
if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */
AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */
goto permission_denied;
- } else {
- BUG();
}
} else {
if (!(access & AFS_ACE_LOOKUP))
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 629c74986cff..3af4625e2f8c 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -67,12 +67,6 @@ struct afs_server *afs_find_server(struct afs_net *net,
sizeof(struct in6_addr));
if (diff == 0)
goto found;
- if (diff < 0) {
- // TODO: Sort the list
- //if (i == alist->nr_ipv4)
- // goto not_found;
- break;
- }
}
}
} else {
@@ -87,17 +81,10 @@ struct afs_server *afs_find_server(struct afs_net *net,
(u32 __force)b->sin6_addr.s6_addr32[3]);
if (diff == 0)
goto found;
- if (diff < 0) {
- // TODO: Sort the list
- //if (i == 0)
- // goto not_found;
- break;
- }
}
}
}
- //not_found:
server = NULL;
found:
if (server && !atomic_inc_not_zero(&server->usage))
@@ -395,14 +382,16 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
struct afs_addr_cursor ac = {
.alist = alist,
- .addr = &alist->addrs[0],
.start = alist->index,
- .index = alist->index,
+ .index = 0,
+ .addr = &alist->addrs[alist->index],
.error = 0,
};
_enter("%p", server);
- afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+ if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
+ afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+
call_rcu(&server->rcu, afs_server_rcu);
afs_dec_servers_outstanding(net);
}
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 0f8dc4c8f07c..8a5760aa5832 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -49,6 +49,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
goto error;
refcount_set(&slist->usage, 1);
+ rwlock_init(&slist->lock);
/* Make sure a records exists for each server in the list. */
for (i = 0; i < vldb->nr_servers; i++) {
@@ -64,9 +65,11 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
goto error_2;
}
- /* Insertion-sort by server pointer */
+ /* Insertion-sort by UUID */
for (j = 0; j < slist->nr_servers; j++)
- if (slist->servers[j].server >= server)
+ if (memcmp(&slist->servers[j].server->uuid,
+ &server->uuid,
+ sizeof(server->uuid)) >= 0)
break;
if (j < slist->nr_servers) {
if (slist->servers[j].server == server) {
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 65081ec3c36e..9e5d7966621c 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -590,7 +590,7 @@ static void afs_i_init_once(void *_vnode)
memset(vnode, 0, sizeof(*vnode));
inode_init_once(&vnode->vfs_inode);
mutex_init(&vnode->io_lock);
- mutex_init(&vnode->validate_lock);
+ init_rwsem(&vnode->validate_lock);
spin_lock_init(&vnode->wb_lock);
spin_lock_init(&vnode->lock);
INIT_LIST_HEAD(&vnode->wb_keys);
@@ -688,7 +688,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
if (afs_begin_vnode_operation(&fc, vnode, key)) {
fc.flags |= AFS_FS_CURSOR_NO_VSLEEP;
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_get_volume_status(&fc, &vs);
}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 1ed7e2fd2f35..c3b740813fc7 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -23,7 +23,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
struct afs_uvldbentry__xdr *uvldb;
struct afs_vldb_entry *entry;
bool new_only = false;
- u32 tmp, nr_servers;
+ u32 tmp, nr_servers, vlflags;
int i, ret;
_enter("");
@@ -55,6 +55,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
new_only = true;
}
+ vlflags = ntohl(uvldb->flags);
for (i = 0; i < nr_servers; i++) {
struct afs_uuid__xdr *xdr;
struct afs_uuid *uuid;
@@ -64,12 +65,13 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
if (tmp & AFS_VLSF_DONTUSE ||
(new_only && !(tmp & AFS_VLSF_NEWREPSITE)))
continue;
- if (tmp & AFS_VLSF_RWVOL)
+ if (tmp & AFS_VLSF_RWVOL) {
entry->fs_mask[i] |= AFS_VOL_VTM_RW;
+ if (vlflags & AFS_VLF_BACKEXISTS)
+ entry->fs_mask[i] |= AFS_VOL_VTM_BAK;
+ }
if (tmp & AFS_VLSF_ROVOL)
entry->fs_mask[i] |= AFS_VOL_VTM_RO;
- if (tmp & AFS_VLSF_BACKVOL)
- entry->fs_mask[i] |= AFS_VOL_VTM_BAK;
if (!entry->fs_mask[i])
continue;
@@ -89,15 +91,14 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
for (i = 0; i < AFS_MAXTYPES; i++)
entry->vid[i] = ntohl(uvldb->volumeId[i]);
- tmp = ntohl(uvldb->flags);
- if (tmp & AFS_VLF_RWEXISTS)
+ if (vlflags & AFS_VLF_RWEXISTS)
__set_bit(AFS_VLDB_HAS_RW, &entry->flags);
- if (tmp & AFS_VLF_ROEXISTS)
+ if (vlflags & AFS_VLF_ROEXISTS)
__set_bit(AFS_VLDB_HAS_RO, &entry->flags);
- if (tmp & AFS_VLF_BACKEXISTS)
+ if (vlflags & AFS_VLF_BACKEXISTS)
__set_bit(AFS_VLDB_HAS_BAK, &entry->flags);
- if (!(tmp & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) {
+ if (!(vlflags & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) {
entry->error = -ENOMEDIUM;
__set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags);
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c164698dc304..8b39e6ebb40b 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -351,7 +351,7 @@ found_key:
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_store_data(&fc, mapping, first, last, offset, to);
}
diff --git a/fs/aio.c b/fs/aio.c
index 88d7927ffbc6..b850e92ee0d5 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -5,6 +5,7 @@
* Implements an efficient asynchronous io interface.
*
* Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright 2018 Christoph Hellwig.
*
* See ../COPYING for licensing terms.
*/
@@ -46,6 +47,8 @@
#include "internal.h"
+#define KIOCB_KEY 0
+
#define AIO_RING_MAGIC 0xa10a10a1
#define AIO_RING_COMPAT_FEATURES 1
#define AIO_RING_INCOMPAT_FEATURES 0
@@ -156,21 +159,29 @@ struct kioctx {
unsigned id;
};
-/*
- * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
- * cancelled or completed (this makes a certain amount of sense because
- * successful cancellation - io_cancel() - does deliver the completion to
- * userspace).
- *
- * And since most things don't implement kiocb cancellation and we'd really like
- * kiocb completion to be lockless when possible, we use ki_cancel to
- * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
- * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
- */
-#define KIOCB_CANCELLED ((void *) (~0ULL))
+struct fsync_iocb {
+ struct work_struct work;
+ struct file *file;
+ bool datasync;
+};
+
+struct poll_iocb {
+ struct file *file;
+ __poll_t events;
+ struct wait_queue_head *head;
+
+ union {
+ struct wait_queue_entry wait;
+ struct work_struct work;
+ };
+};
struct aio_kiocb {
- struct kiocb common;
+ union {
+ struct kiocb rw;
+ struct fsync_iocb fsync;
+ struct poll_iocb poll;
+ };
struct kioctx *ki_ctx;
kiocb_cancel_fn *ki_cancel;
@@ -264,9 +275,6 @@ static int __init aio_setup(void)
kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
-
- pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
-
return 0;
}
__initcall(aio_setup);
@@ -552,42 +560,20 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
- struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
+ struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
struct kioctx *ctx = req->ki_ctx;
unsigned long flags;
- spin_lock_irqsave(&ctx->ctx_lock, flags);
-
- if (!req->ki_list.next)
- list_add(&req->ki_list, &ctx->active_reqs);
+ if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
+ return;
+ spin_lock_irqsave(&ctx->ctx_lock, flags);
+ list_add_tail(&req->ki_list, &ctx->active_reqs);
req->ki_cancel = cancel;
-
spin_unlock_irqrestore(&ctx->ctx_lock, flags);
}
EXPORT_SYMBOL(kiocb_set_cancel_fn);
-static int kiocb_cancel(struct aio_kiocb *kiocb)
-{
- kiocb_cancel_fn *old, *cancel;
-
- /*
- * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it
- * actually has a cancel function, hence the cmpxchg()
- */
-
- cancel = READ_ONCE(kiocb->ki_cancel);
- do {
- if (!cancel || cancel == KIOCB_CANCELLED)
- return -EINVAL;
-
- old = cancel;
- cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
- } while (cancel != old);
-
- return cancel(&kiocb->common);
-}
-
/*
* free_ioctx() should be RCU delayed to synchronize against the RCU
* protected lookup_ioctx() and also needs process context to call
@@ -634,9 +620,8 @@ static void free_ioctx_users(struct percpu_ref *ref)
while (!list_empty(&ctx->active_reqs)) {
req = list_first_entry(&ctx->active_reqs,
struct aio_kiocb, ki_list);
-
+ req->ki_cancel(&req->rw);
list_del_init(&req->ki_list);
- kiocb_cancel(req);
}
spin_unlock_irq(&ctx->ctx_lock);
@@ -1042,7 +1027,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
goto out_put;
percpu_ref_get(&ctx->reqs);
-
+ INIT_LIST_HEAD(&req->ki_list);
req->ki_ctx = ctx;
return req;
out_put:
@@ -1050,15 +1035,6 @@ out_put:
return NULL;
}
-static void kiocb_free(struct aio_kiocb *req)
-{
- if (req->common.ki_filp)
- fput(req->common.ki_filp);
- if (req->ki_eventfd != NULL)
- eventfd_ctx_put(req->ki_eventfd);
- kmem_cache_free(kiocb_cachep, req);
-}
-
static struct kioctx *lookup_ioctx(unsigned long ctx_id)
{
struct aio_ring __user *ring = (void __user *)ctx_id;
@@ -1078,8 +1054,8 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
ctx = rcu_dereference(table->table[id]);
if (ctx && ctx->user_id == ctx_id) {
- percpu_ref_get(&ctx->users);
- ret = ctx;
+ if (percpu_ref_tryget_live(&ctx->users))
+ ret = ctx;
}
out:
rcu_read_unlock();
@@ -1089,44 +1065,14 @@ out:
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
-static void aio_complete(struct kiocb *kiocb, long res, long res2)
+static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
{
- struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
struct io_event *ev_page, *event;
unsigned tail, pos, head;
unsigned long flags;
- if (kiocb->ki_flags & IOCB_WRITE) {
- struct file *file = kiocb->ki_filp;
-
- /*
- * Tell lockdep we inherited freeze protection from submission
- * thread.
- */
- if (S_ISREG(file_inode(file)->i_mode))
- __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
- file_end_write(file);
- }
-
- /*
- * Special case handling for sync iocbs:
- * - events go directly into the iocb for fast handling
- * - the sync task with the iocb in its stack holds the single iocb
- * ref, no other paths have a way to get another ref
- * - the sync task helpfully left a reference to itself in the iocb
- */
- BUG_ON(is_sync_kiocb(kiocb));
-
- if (iocb->ki_list.next) {
- unsigned long flags;
-
- spin_lock_irqsave(&ctx->ctx_lock, flags);
- list_del(&iocb->ki_list);
- spin_unlock_irqrestore(&ctx->ctx_lock, flags);
- }
-
/*
* Add a completion event to the ring buffer. Must be done holding
* ctx->completion_lock to prevent other code from messing with the tail
@@ -1180,11 +1126,12 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
* eventfd. The eventfd_signal() function is safe to be called
* from IRQ context.
*/
- if (iocb->ki_eventfd != NULL)
+ if (iocb->ki_eventfd) {
eventfd_signal(iocb->ki_eventfd, 1);
+ eventfd_ctx_put(iocb->ki_eventfd);
+ }
- /* everything turned out well, dispose of the aiocb. */
- kiocb_free(iocb);
+ kmem_cache_free(kiocb_cachep, iocb);
/*
* We have to order our ring_info tail store above and test
@@ -1250,14 +1197,13 @@ static long aio_read_events_ring(struct kioctx *ctx,
if (head == tail)
break;
- avail = min(avail, nr - ret);
- avail = min_t(long, avail, AIO_EVENTS_PER_PAGE -
- ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
-
pos = head + AIO_EVENTS_OFFSET;
page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
pos %= AIO_EVENTS_PER_PAGE;
+ avail = min(avail, nr - ret);
+ avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
+
ev = kmap(page);
copy_ret = copy_to_user(event + ret, ev + pos,
sizeof(*ev) * avail);
@@ -1328,10 +1274,6 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
wait_event_interruptible_hrtimeout(ctx->wait,
aio_read_events(ctx, min_nr, nr, event, &ret),
until);
-
- if (!ret && signal_pending(current))
- ret = -EINTR;
-
return ret;
}
@@ -1447,6 +1389,58 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
return -EINVAL;
}
+static void aio_remove_iocb(struct aio_kiocb *iocb)
+{
+ struct kioctx *ctx = iocb->ki_ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->ctx_lock, flags);
+ list_del(&iocb->ki_list);
+ spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+}
+
+static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
+{
+ struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
+
+ if (!list_empty_careful(&iocb->ki_list))
+ aio_remove_iocb(iocb);
+
+ if (kiocb->ki_flags & IOCB_WRITE) {
+ struct inode *inode = file_inode(kiocb->ki_filp);
+
+ /*
+ * Tell lockdep we inherited freeze protection from submission
+ * thread.
+ */
+ if (S_ISREG(inode->i_mode))
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+ file_end_write(kiocb->ki_filp);
+ }
+
+ fput(kiocb->ki_filp);
+ aio_complete(iocb, res, res2);
+}
+
+static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
+{
+ int ret;
+
+ req->ki_filp = fget(iocb->aio_fildes);
+ if (unlikely(!req->ki_filp))
+ return -EBADF;
+ req->ki_complete = aio_complete_rw;
+ req->ki_pos = iocb->aio_offset;
+ req->ki_flags = iocb_flags(req->ki_filp);
+ if (iocb->aio_flags & IOCB_FLAG_RESFD)
+ req->ki_flags |= IOCB_EVENTFD;
+ req->ki_hint = file_write_hint(req->ki_filp);
+ ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
+ if (unlikely(ret))
+ fput(req->ki_filp);
+ return ret;
+}
+
static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
bool vectored, bool compat, struct iov_iter *iter)
{
@@ -1466,11 +1460,11 @@ static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
}
-static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
+static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
{
switch (ret) {
case -EIOCBQUEUED:
- return ret;
+ break;
case -ERESTARTSYS:
case -ERESTARTNOINTR:
case -ERESTARTNOHAND:
@@ -1482,85 +1476,270 @@ static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
ret = -EINTR;
/*FALLTHRU*/
default:
- aio_complete(req, ret, 0);
- return 0;
+ aio_complete_rw(req, ret, 0);
}
}
static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
bool compat)
{
- struct file *file = req->ki_filp;
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
+ struct file *file;
ssize_t ret;
+ ret = aio_prep_rw(req, iocb);
+ if (ret)
+ return ret;
+ file = req->ki_filp;
+
+ ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ)))
- return -EBADF;
+ goto out_fput;
+ ret = -EINVAL;
if (unlikely(!file->f_op->read_iter))
- return -EINVAL;
+ goto out_fput;
ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
if (ret)
- return ret;
+ goto out_fput;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret)
- ret = aio_ret(req, call_read_iter(file, req, &iter));
+ aio_rw_done(req, call_read_iter(file, req, &iter));
kfree(iovec);
+out_fput:
+ if (unlikely(ret))
+ fput(file);
return ret;
}
static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
bool compat)
{
- struct file *file = req->ki_filp;
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
+ struct file *file;
ssize_t ret;
+ ret = aio_prep_rw(req, iocb);
+ if (ret)
+ return ret;
+ file = req->ki_filp;
+
+ ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
- return -EBADF;
+ goto out_fput;
+ ret = -EINVAL;
if (unlikely(!file->f_op->write_iter))
- return -EINVAL;
+ goto out_fput;
ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
if (ret)
- return ret;
+ goto out_fput;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) {
- req->ki_flags |= IOCB_WRITE;
- file_start_write(file);
- ret = aio_ret(req, call_write_iter(file, req, &iter));
/*
- * We release freeze protection in aio_complete(). Fool lockdep
- * by telling it the lock got released so that it doesn't
- * complain about held lock when we return to userspace.
+ * Open-code file_start_write here to grab freeze protection,
+ * which will be released by another thread in
+ * aio_complete_rw(). Fool lockdep by telling it the lock got
+ * released so that it doesn't complain about the held lock when
+ * we return to userspace.
*/
- if (S_ISREG(file_inode(file)->i_mode))
+ if (S_ISREG(file_inode(file)->i_mode)) {
+ __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+ }
+ req->ki_flags |= IOCB_WRITE;
+ aio_rw_done(req, call_write_iter(file, req, &iter));
}
kfree(iovec);
+out_fput:
+ if (unlikely(ret))
+ fput(file);
return ret;
}
+static void aio_fsync_work(struct work_struct *work)
+{
+ struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
+ int ret;
+
+ ret = vfs_fsync(req->file, req->datasync);
+ fput(req->file);
+ aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
+}
+
+static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
+{
+ if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
+ iocb->aio_rw_flags))
+ return -EINVAL;
+ req->file = fget(iocb->aio_fildes);
+ if (unlikely(!req->file))
+ return -EBADF;
+ if (unlikely(!req->file->f_op->fsync)) {
+ fput(req->file);
+ return -EINVAL;
+ }
+
+ req->datasync = datasync;
+ INIT_WORK(&req->work, aio_fsync_work);
+ schedule_work(&req->work);
+ return 0;
+}
+
+/* need to use list_del_init so we can check if item was present */
+static inline bool __aio_poll_remove(struct poll_iocb *req)
+{
+ if (list_empty(&req->wait.entry))
+ return false;
+ list_del_init(&req->wait.entry);
+ return true;
+}
+
+static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
+{
+ fput(iocb->poll.file);
+ aio_complete(iocb, mangle_poll(mask), 0);
+}
+
+static void aio_poll_work(struct work_struct *work)
+{
+ struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work);
+
+ if (!list_empty_careful(&iocb->ki_list))
+ aio_remove_iocb(iocb);
+ __aio_poll_complete(iocb, iocb->poll.events);
+}
+
+static int aio_poll_cancel(struct kiocb *iocb)
+{
+ struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
+ struct poll_iocb *req = &aiocb->poll;
+ struct wait_queue_head *head = req->head;
+ bool found = false;
+
+ spin_lock(&head->lock);
+ found = __aio_poll_remove(req);
+ spin_unlock(&head->lock);
+
+ if (found) {
+ req->events = 0;
+ INIT_WORK(&req->work, aio_poll_work);
+ schedule_work(&req->work);
+ }
+ return 0;
+}
+
+static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+ void *key)
+{
+ struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
+ struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+ struct file *file = req->file;
+ __poll_t mask = key_to_poll(key);
+
+ assert_spin_locked(&req->head->lock);
+
+ /* for instances that support it check for an event match first: */
+ if (mask && !(mask & req->events))
+ return 0;
+
+ mask = file->f_op->poll_mask(file, req->events);
+ if (!mask)
+ return 0;
+
+ __aio_poll_remove(req);
+
+ /*
+ * Try completing without a context switch if we can acquire ctx_lock
+ * without spinning. Otherwise we need to defer to a workqueue to
+ * avoid a deadlock due to the lock order.
+ */
+ if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
+ list_del_init(&iocb->ki_list);
+ spin_unlock(&iocb->ki_ctx->ctx_lock);
+
+ __aio_poll_complete(iocb, mask);
+ } else {
+ req->events = mask;
+ INIT_WORK(&req->work, aio_poll_work);
+ schedule_work(&req->work);
+ }
+
+ return 1;
+}
+
+static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
+{
+ struct kioctx *ctx = aiocb->ki_ctx;
+ struct poll_iocb *req = &aiocb->poll;
+ __poll_t mask;
+
+ /* reject any unknown events outside the normal event mask. */
+ if ((u16)iocb->aio_buf != iocb->aio_buf)
+ return -EINVAL;
+ /* reject fields that are not defined for poll */
+ if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
+ return -EINVAL;
+
+ req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+ req->file = fget(iocb->aio_fildes);
+ if (unlikely(!req->file))
+ return -EBADF;
+ if (!file_has_poll_mask(req->file))
+ goto out_fail;
+
+ req->head = req->file->f_op->get_poll_head(req->file, req->events);
+ if (!req->head)
+ goto out_fail;
+ if (IS_ERR(req->head)) {
+ mask = EPOLLERR;
+ goto done;
+ }
+
+ init_waitqueue_func_entry(&req->wait, aio_poll_wake);
+ aiocb->ki_cancel = aio_poll_cancel;
+
+ spin_lock_irq(&ctx->ctx_lock);
+ spin_lock(&req->head->lock);
+ mask = req->file->f_op->poll_mask(req->file, req->events);
+ if (!mask) {
+ __add_wait_queue(req->head, &req->wait);
+ list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+ }
+ spin_unlock(&req->head->lock);
+ spin_unlock_irq(&ctx->ctx_lock);
+done:
+ if (mask)
+ __aio_poll_complete(aiocb, mask);
+ return 0;
+out_fail:
+ fput(req->file);
+ return -EINVAL; /* same as no support for IOCB_CMD_POLL */
+}
+
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- struct iocb *iocb, bool compat)
+ bool compat)
{
struct aio_kiocb *req;
- struct file *file;
+ struct iocb iocb;
ssize_t ret;
+ if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
+ return -EFAULT;
+
/* enforce forwards compatibility on users */
- if (unlikely(iocb->aio_reserved2)) {
+ if (unlikely(iocb.aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
/* prevent overflows */
if (unlikely(
- (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
- (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
- ((ssize_t)iocb->aio_nbytes < 0)
+ (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
+ (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
+ ((ssize_t)iocb.aio_nbytes < 0)
)) {
pr_debug("EINVAL: overflow check\n");
return -EINVAL;
@@ -1570,37 +1749,19 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
if (unlikely(!req))
return -EAGAIN;
- req->common.ki_filp = file = fget(iocb->aio_fildes);
- if (unlikely(!req->common.ki_filp)) {
- ret = -EBADF;
- goto out_put_req;
- }
- req->common.ki_pos = iocb->aio_offset;
- req->common.ki_complete = aio_complete;
- req->common.ki_flags = iocb_flags(req->common.ki_filp);
- req->common.ki_hint = file_write_hint(file);
-
- if (iocb->aio_flags & IOCB_FLAG_RESFD) {
+ if (iocb.aio_flags & IOCB_FLAG_RESFD) {
/*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
* instance of the file* now. The file descriptor must be
* an eventfd() fd, and will be signaled for each completed
* event using the eventfd_signal() function.
*/
- req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
+ req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd);
if (IS_ERR(req->ki_eventfd)) {
ret = PTR_ERR(req->ki_eventfd);
req->ki_eventfd = NULL;
goto out_put_req;
}
-
- req->common.ki_flags |= IOCB_EVENTFD;
- }
-
- ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
- if (unlikely(ret)) {
- pr_debug("EINVAL: aio_rw_flags\n");
- goto out_put_req;
}
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1610,41 +1771,67 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
}
req->ki_user_iocb = user_iocb;
- req->ki_user_data = iocb->aio_data;
+ req->ki_user_data = iocb.aio_data;
- get_file(file);
- switch (iocb->aio_lio_opcode) {
+ switch (iocb.aio_lio_opcode) {
case IOCB_CMD_PREAD:
- ret = aio_read(&req->common, iocb, false, compat);
+ ret = aio_read(&req->rw, &iocb, false, compat);
break;
case IOCB_CMD_PWRITE:
- ret = aio_write(&req->common, iocb, false, compat);
+ ret = aio_write(&req->rw, &iocb, false, compat);
break;
case IOCB_CMD_PREADV:
- ret = aio_read(&req->common, iocb, true, compat);
+ ret = aio_read(&req->rw, &iocb, true, compat);
break;
case IOCB_CMD_PWRITEV:
- ret = aio_write(&req->common, iocb, true, compat);
+ ret = aio_write(&req->rw, &iocb, true, compat);
+ break;
+ case IOCB_CMD_FSYNC:
+ ret = aio_fsync(&req->fsync, &iocb, false);
+ break;
+ case IOCB_CMD_FDSYNC:
+ ret = aio_fsync(&req->fsync, &iocb, true);
+ break;
+ case IOCB_CMD_POLL:
+ ret = aio_poll(req, &iocb);
break;
default:
- pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
+ pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
ret = -EINVAL;
break;
}
- fput(file);
- if (ret && ret != -EIOCBQUEUED)
+ /*
+ * If ret is 0, we'd either done aio_complete() ourselves or have
+ * arranged for that to be done asynchronously. Anything non-zero
+ * means that we need to destroy req ourselves.
+ */
+ if (ret)
goto out_put_req;
return 0;
out_put_req:
put_reqs_available(ctx, 1);
percpu_ref_put(&ctx->reqs);
- kiocb_free(req);
+ if (req->ki_eventfd)
+ eventfd_ctx_put(req->ki_eventfd);
+ kmem_cache_free(kiocb_cachep, req);
return ret;
}
-static long do_io_submit(aio_context_t ctx_id, long nr,
- struct iocb __user *__user *iocbpp, bool compat)
+/* sys_io_submit:
+ * Queue the nr iocbs pointed to by iocbpp for processing. Returns
+ * the number of iocbs queued. May return -EINVAL if the aio_context
+ * specified by ctx_id is invalid, if nr is < 0, if the iocb at
+ * *iocbpp[0] is not properly initialized, if the operation specified
+ * is invalid for the file descriptor in the iocb. May fail with
+ * -EFAULT if any of the data structures point to invalid data. May
+ * fail with -EBADF if the file descriptor specified in the first
+ * iocb is invalid. May fail with -EAGAIN if insufficient resources
+ * are available to queue any iocbs. Will return 0 if nr is 0. Will
+ * fail with -ENOSYS if not implemented.
+ */
+SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
+ struct iocb __user * __user *, iocbpp)
{
struct kioctx *ctx;
long ret = 0;
@@ -1654,39 +1841,25 @@ static long do_io_submit(aio_context_t ctx_id, long nr,
if (unlikely(nr < 0))
return -EINVAL;
- if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
- nr = LONG_MAX/sizeof(*iocbpp);
-
- if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
- return -EFAULT;
-
ctx = lookup_ioctx(ctx_id);
if (unlikely(!ctx)) {
pr_debug("EINVAL: invalid context id\n");
return -EINVAL;
}
- blk_start_plug(&plug);
+ if (nr > ctx->nr_events)
+ nr = ctx->nr_events;
- /*
- * AKPM: should this return a partial result if some of the IOs were
- * successfully submitted?
- */
- for (i=0; i<nr; i++) {
+ blk_start_plug(&plug);
+ for (i = 0; i < nr; i++) {
struct iocb __user *user_iocb;
- struct iocb tmp;
- if (unlikely(__get_user(user_iocb, iocbpp + i))) {
+ if (unlikely(get_user(user_iocb, iocbpp + i))) {
ret = -EFAULT;
break;
}
- if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
- ret = -EFAULT;
- break;
- }
-
- ret = io_submit_one(ctx, user_iocb, &tmp, compat);
+ ret = io_submit_one(ctx, user_iocb, false);
if (ret)
break;
}
@@ -1696,59 +1869,44 @@ static long do_io_submit(aio_context_t ctx_id, long nr,
return i ? i : ret;
}
-/* sys_io_submit:
- * Queue the nr iocbs pointed to by iocbpp for processing. Returns
- * the number of iocbs queued. May return -EINVAL if the aio_context
- * specified by ctx_id is invalid, if nr is < 0, if the iocb at
- * *iocbpp[0] is not properly initialized, if the operation specified
- * is invalid for the file descriptor in the iocb. May fail with
- * -EFAULT if any of the data structures point to invalid data. May
- * fail with -EBADF if the file descriptor specified in the first
- * iocb is invalid. May fail with -EAGAIN if insufficient resources
- * are available to queue any iocbs. Will return 0 if nr is 0. Will
- * fail with -ENOSYS if not implemented.
- */
-SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
- struct iocb __user * __user *, iocbpp)
-{
- return do_io_submit(ctx_id, nr, iocbpp, 0);
-}
-
#ifdef CONFIG_COMPAT
-static inline long
-copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
+COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
+ int, nr, compat_uptr_t __user *, iocbpp)
{
- compat_uptr_t uptr;
- int i;
+ struct kioctx *ctx;
+ long ret = 0;
+ int i = 0;
+ struct blk_plug plug;
- for (i = 0; i < nr; ++i) {
- if (get_user(uptr, ptr32 + i))
- return -EFAULT;
- if (put_user(compat_ptr(uptr), ptr64 + i))
- return -EFAULT;
+ if (unlikely(nr < 0))
+ return -EINVAL;
+
+ ctx = lookup_ioctx(ctx_id);
+ if (unlikely(!ctx)) {
+ pr_debug("EINVAL: invalid context id\n");
+ return -EINVAL;
}
- return 0;
-}
-#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *))
+ if (nr > ctx->nr_events)
+ nr = ctx->nr_events;
-COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
- int, nr, u32 __user *, iocb)
-{
- struct iocb __user * __user *iocb64;
- long ret;
+ blk_start_plug(&plug);
+ for (i = 0; i < nr; i++) {
+ compat_uptr_t user_iocb;
- if (unlikely(nr < 0))
- return -EINVAL;
+ if (unlikely(get_user(user_iocb, iocbpp + i))) {
+ ret = -EFAULT;
+ break;
+ }
- if (nr > MAX_AIO_SUBMITS)
- nr = MAX_AIO_SUBMITS;
+ ret = io_submit_one(ctx, compat_ptr(user_iocb), true);
+ if (ret)
+ break;
+ }
+ blk_finish_plug(&plug);
- iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
- ret = copy_iocb(nr, iocb, iocb64);
- if (!ret)
- ret = do_io_submit(ctx_id, nr, iocb64, 1);
- return ret;
+ percpu_ref_put(&ctx->users);
+ return i ? i : ret;
}
#endif
@@ -1756,15 +1914,12 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
* Finds a given iocb for cancellation.
*/
static struct aio_kiocb *
-lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb)
{
struct aio_kiocb *kiocb;
assert_spin_locked(&ctx->ctx_lock);
- if (key != KIOCB_KEY)
- return NULL;
-
/* TODO: use a hash or array, this sucks. */
list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
if (kiocb->ki_user_iocb == iocb)
@@ -1788,25 +1943,24 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
{
struct kioctx *ctx;
struct aio_kiocb *kiocb;
+ int ret = -EINVAL;
u32 key;
- int ret;
- ret = get_user(key, &iocb->aio_key);
- if (unlikely(ret))
+ if (unlikely(get_user(key, &iocb->aio_key)))
return -EFAULT;
+ if (unlikely(key != KIOCB_KEY))
+ return -EINVAL;
ctx = lookup_ioctx(ctx_id);
if (unlikely(!ctx))
return -EINVAL;
spin_lock_irq(&ctx->ctx_lock);
-
- kiocb = lookup_kiocb(ctx, iocb, key);
- if (kiocb)
- ret = kiocb_cancel(kiocb);
- else
- ret = -EINVAL;
-
+ kiocb = lookup_kiocb(ctx, iocb);
+ if (kiocb) {
+ ret = kiocb->ki_cancel(&kiocb->rw);
+ list_del_init(&kiocb->ki_list);
+ }
spin_unlock_irq(&ctx->ctx_lock);
if (!ret) {
@@ -1861,13 +2015,60 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
struct timespec __user *, timeout)
{
struct timespec64 ts;
+ int ret;
+
+ if (timeout && unlikely(get_timespec64(&ts, timeout)))
+ return -EFAULT;
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+ if (!ret && signal_pending(current))
+ ret = -EINTR;
+ return ret;
+}
+
+SYSCALL_DEFINE6(io_pgetevents,
+ aio_context_t, ctx_id,
+ long, min_nr,
+ long, nr,
+ struct io_event __user *, events,
+ struct timespec __user *, timeout,
+ const struct __aio_sigset __user *, usig)
+{
+ struct __aio_sigset ksig = { NULL, };
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 ts;
+ int ret;
+
+ if (timeout && unlikely(get_timespec64(&ts, timeout)))
+ return -EFAULT;
+
+ if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+ return -EFAULT;
- if (timeout) {
- if (unlikely(get_timespec64(&ts, timeout)))
+ if (ksig.sigmask) {
+ if (ksig.sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+ if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask)))
return -EFAULT;
+ sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+ sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ }
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+ if (signal_pending(current)) {
+ if (ksig.sigmask) {
+ current->saved_sigmask = sigsaved;
+ set_restore_sigmask();
+ }
+
+ if (!ret)
+ ret = -ERESTARTNOHAND;
+ } else {
+ if (ksig.sigmask)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
}
- return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+ return ret;
}
#ifdef CONFIG_COMPAT
@@ -1878,13 +2079,64 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
struct compat_timespec __user *, timeout)
{
struct timespec64 t;
+ int ret;
+
+ if (timeout && compat_get_timespec64(&t, timeout))
+ return -EFAULT;
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+ if (!ret && signal_pending(current))
+ ret = -EINTR;
+ return ret;
+}
+
+
+struct __compat_aio_sigset {
+ compat_sigset_t __user *sigmask;
+ compat_size_t sigsetsize;
+};
+
+COMPAT_SYSCALL_DEFINE6(io_pgetevents,
+ compat_aio_context_t, ctx_id,
+ compat_long_t, min_nr,
+ compat_long_t, nr,
+ struct io_event __user *, events,
+ struct compat_timespec __user *, timeout,
+ const struct __compat_aio_sigset __user *, usig)
+{
+ struct __compat_aio_sigset ksig = { NULL, };
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 t;
+ int ret;
+
+ if (timeout && compat_get_timespec64(&t, timeout))
+ return -EFAULT;
+
+ if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+ return -EFAULT;
- if (timeout) {
- if (compat_get_timespec64(&t, timeout))
+ if (ksig.sigmask) {
+ if (ksig.sigsetsize != sizeof(compat_sigset_t))
+ return -EINVAL;
+ if (get_compat_sigset(&ksigmask, ksig.sigmask))
return -EFAULT;
+ sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+ sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ }
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+ if (signal_pending(current)) {
+ if (ksig.sigmask) {
+ current->saved_sigmask = sigsaved;
+ set_restore_sigmask();
+ }
+ if (!ret)
+ ret = -ERESTARTNOHAND;
+ } else {
+ if (ksig.sigmask)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
}
- return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+ return ret;
}
#endif
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index af2832aaeec5..4700b4534439 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -198,23 +198,16 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
if (ret == BEFS_BT_NOT_FOUND) {
befs_debug(sb, "<--- %s %pd not found", __func__, dentry);
- d_add(dentry, NULL);
- return ERR_PTR(-ENOENT);
-
+ inode = NULL;
} else if (ret != BEFS_OK || offset == 0) {
befs_error(sb, "<--- %s Error", __func__);
- return ERR_PTR(-ENODATA);
+ inode = ERR_PTR(-ENODATA);
+ } else {
+ inode = befs_iget(dir->i_sb, (ino_t) offset);
}
-
- inode = befs_iget(dir->i_sb, (ino_t) offset);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- d_add(dentry, inode);
-
befs_debug(sb, "<--- %s", __func__);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index ee832ca5f734..f32f21c3bbc7 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -21,10 +21,9 @@
#define dprintf(x...)
#endif
-static int bfs_add_entry(struct inode *dir, const unsigned char *name,
- int namelen, int ino);
+static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino);
static struct buffer_head *bfs_find_entry(struct inode *dir,
- const unsigned char *name, int namelen,
+ const struct qstr *child,
struct bfs_dirent **res_dir);
static int bfs_readdir(struct file *f, struct dir_context *ctx)
@@ -111,8 +110,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
mark_inode_dirty(inode);
bfs_dump_imap("create", s);
- err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len,
- inode->i_ino);
+ err = bfs_add_entry(dir, &dentry->d_name, inode->i_ino);
if (err) {
inode_dec_link_count(inode);
mutex_unlock(&info->bfs_lock);
@@ -136,19 +134,14 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ENAMETOOLONG);
mutex_lock(&info->bfs_lock);
- bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
+ bh = bfs_find_entry(dir, &dentry->d_name, &de);
if (bh) {
unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
brelse(bh);
inode = bfs_iget(dir->i_sb, ino);
- if (IS_ERR(inode)) {
- mutex_unlock(&info->bfs_lock);
- return ERR_CAST(inode);
- }
}
mutex_unlock(&info->bfs_lock);
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int bfs_link(struct dentry *old, struct inode *dir,
@@ -159,8 +152,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
int err;
mutex_lock(&info->bfs_lock);
- err = bfs_add_entry(dir, new->d_name.name, new->d_name.len,
- inode->i_ino);
+ err = bfs_add_entry(dir, &new->d_name, inode->i_ino);
if (err) {
mutex_unlock(&info->bfs_lock);
return err;
@@ -183,7 +175,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
struct bfs_sb_info *info = BFS_SB(inode->i_sb);
mutex_lock(&info->bfs_lock);
- bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
+ bh = bfs_find_entry(dir, &dentry->d_name, &de);
if (!bh || (le16_to_cpu(de->ino) != inode->i_ino))
goto out_brelse;
@@ -228,27 +220,21 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
info = BFS_SB(old_inode->i_sb);
mutex_lock(&info->bfs_lock);
- old_bh = bfs_find_entry(old_dir,
- old_dentry->d_name.name,
- old_dentry->d_name.len, &old_de);
+ old_bh = bfs_find_entry(old_dir, &old_dentry->d_name, &old_de);
if (!old_bh || (le16_to_cpu(old_de->ino) != old_inode->i_ino))
goto end_rename;
error = -EPERM;
new_inode = d_inode(new_dentry);
- new_bh = bfs_find_entry(new_dir,
- new_dentry->d_name.name,
- new_dentry->d_name.len, &new_de);
+ new_bh = bfs_find_entry(new_dir, &new_dentry->d_name, &new_de);
if (new_bh && !new_inode) {
brelse(new_bh);
new_bh = NULL;
}
if (!new_bh) {
- error = bfs_add_entry(new_dir,
- new_dentry->d_name.name,
- new_dentry->d_name.len,
+ error = bfs_add_entry(new_dir, &new_dentry->d_name,
old_inode->i_ino);
if (error)
goto end_rename;
@@ -278,9 +264,10 @@ const struct inode_operations bfs_dir_inops = {
.rename = bfs_rename,
};
-static int bfs_add_entry(struct inode *dir, const unsigned char *name,
- int namelen, int ino)
+static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino)
{
+ const unsigned char *name = child->name;
+ int namelen = child->len;
struct buffer_head *bh;
struct bfs_dirent *de;
int block, sblock, eblock, off, pos;
@@ -332,12 +319,14 @@ static inline int bfs_namecmp(int len, const unsigned char *name,
}
static struct buffer_head *bfs_find_entry(struct inode *dir,
- const unsigned char *name, int namelen,
+ const struct qstr *child,
struct bfs_dirent **res_dir)
{
unsigned long block = 0, offset = 0;
struct buffer_head *bh = NULL;
struct bfs_dirent *de;
+ const unsigned char *name = child->name;
+ int namelen = child->len;
*res_dir = NULL;
if (namelen > BFS_NAMELEN)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7ec920e27065..bef6934b6189 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -272,7 +272,7 @@ struct blkdev_dio {
struct bio bio;
};
-static struct bio_set *blkdev_dio_pool __read_mostly;
+static struct bio_set blkdev_dio_pool;
static void blkdev_bio_end_io(struct bio *bio)
{
@@ -334,7 +334,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, blkdev_dio_pool);
+ bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
bio_get(bio); /* extra ref for the completion handler */
dio = container_of(bio, struct blkdev_dio, bio);
@@ -432,10 +432,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static __init int blkdev_init(void)
{
- blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
- if (!blkdev_dio_pool)
- return -ENOMEM;
- return 0;
+ return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
}
module_init(blkdev_init);
@@ -1322,27 +1319,30 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
* check_disk_size_change - checks for disk size change and adjusts bdev size.
* @disk: struct gendisk to check
* @bdev: struct bdev to adjust.
+ * @verbose: if %true log a message about a size change if there is any
*
* This routine checks to see if the bdev size does not match the disk size
* and adjusts it if it differs. When shrinking the bdev size, its all caches
* are freed.
*/
-void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
+void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
+ bool verbose)
{
loff_t disk_size, bdev_size;
disk_size = (loff_t)get_capacity(disk) << 9;
bdev_size = i_size_read(bdev->bd_inode);
if (disk_size != bdev_size) {
- printk(KERN_INFO
- "%s: detected capacity change from %lld to %lld\n",
- disk->disk_name, bdev_size, disk_size);
+ if (verbose) {
+ printk(KERN_INFO
+ "%s: detected capacity change from %lld to %lld\n",
+ disk->disk_name, bdev_size, disk_size);
+ }
i_size_write(bdev->bd_inode, disk_size);
if (bdev_size > disk_size)
flush_disk(bdev, false);
}
}
-EXPORT_SYMBOL(check_disk_size_change);
/**
* revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
@@ -1364,7 +1364,7 @@ int revalidate_disk(struct gendisk *disk)
return ret;
mutex_lock(&bdev->bd_mutex);
- check_disk_size_change(disk, bdev);
+ check_disk_size_change(disk, bdev, ret == 0);
bdev->bd_invalidated = 0;
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 234bae55b85d..7e075343daa5 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -19,17 +19,17 @@
* ordered operations list so that we make sure to flush out any
* new data the application may have written before commit.
*/
-#define BTRFS_INODE_ORDERED_DATA_CLOSE 0
-#define BTRFS_INODE_ORPHAN_META_RESERVED 1
-#define BTRFS_INODE_DUMMY 2
-#define BTRFS_INODE_IN_DEFRAG 3
-#define BTRFS_INODE_HAS_ORPHAN_ITEM 4
-#define BTRFS_INODE_HAS_ASYNC_EXTENT 5
-#define BTRFS_INODE_NEEDS_FULL_SYNC 6
-#define BTRFS_INODE_COPY_EVERYTHING 7
-#define BTRFS_INODE_IN_DELALLOC_LIST 8
-#define BTRFS_INODE_READDIO_NEED_LOCK 9
-#define BTRFS_INODE_HAS_PROPS 10
+enum {
+ BTRFS_INODE_ORDERED_DATA_CLOSE = 0,
+ BTRFS_INODE_DUMMY,
+ BTRFS_INODE_IN_DEFRAG,
+ BTRFS_INODE_HAS_ASYNC_EXTENT,
+ BTRFS_INODE_NEEDS_FULL_SYNC,
+ BTRFS_INODE_COPY_EVERYTHING,
+ BTRFS_INODE_IN_DELALLOC_LIST,
+ BTRFS_INODE_READDIO_NEED_LOCK,
+ BTRFS_INODE_HAS_PROPS,
+};
/* in memory btrfs inode */
struct btrfs_inode {
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 1061575a7d25..d3e447b45bf7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -990,12 +990,7 @@ static void __free_workspace(int type, struct list_head *workspace,
btrfs_compress_op[idx]->free_workspace(workspace);
atomic_dec(total_ws);
wake:
- /*
- * Make sure counter is updated before we wake up waiters.
- */
- smp_mb();
- if (waitqueue_active(ws_wait))
- wake_up(ws_wait);
+ cond_wake_up(ws_wait);
}
static void free_workspace(int type, struct list_head *ws)
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index cc605f7b23fb..ddda9b80bf20 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -6,6 +6,8 @@
#ifndef BTRFS_COMPRESSION_H
#define BTRFS_COMPRESSION_H
+#include <linux/sizes.h>
+
/*
* We want to make sure that amount of RAM required to uncompress an extent is
* reasonable, so we limit the total size in ram of a compressed extent to
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3fd44835b386..4bc326df472e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2330,7 +2330,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
no_skips = 1;
t = path->nodes[i];
- if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
+ if (i >= lowest_unlock && i > skip_level) {
btrfs_tree_unlock_rw(t, path->locks[i]);
path->locks[i] = 0;
if (write_lock_level &&
@@ -2432,14 +2432,11 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
btrfs_unlock_up_safe(p, level + 1);
btrfs_set_path_blocking(p);
- free_extent_buffer(tmp);
if (p->reada != READA_NONE)
reada_for_search(fs_info, p, level, slot, key->objectid);
- btrfs_release_path(p);
-
ret = -EAGAIN;
- tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
+ tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1,
&first_key);
if (!IS_ERR(tmp)) {
/*
@@ -2448,12 +2445,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
* and give up so that our caller doesn't loop forever
* on our EAGAINs.
*/
- if (!btrfs_buffer_uptodate(tmp, 0, 0))
+ if (!extent_buffer_uptodate(tmp))
ret = -EIO;
free_extent_buffer(tmp);
} else {
ret = PTR_ERR(tmp);
}
+
+ btrfs_release_path(p);
return ret;
}
@@ -2599,6 +2598,78 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
return 0;
}
+static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
+ struct btrfs_path *p,
+ int write_lock_level)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_buffer *b;
+ int root_lock;
+ int level = 0;
+
+ /* We try very hard to do read locks on the root */
+ root_lock = BTRFS_READ_LOCK;
+
+ if (p->search_commit_root) {
+ /* The commit roots are read only so we always do read locks */
+ if (p->need_commit_sem)
+ down_read(&fs_info->commit_root_sem);
+ b = root->commit_root;
+ extent_buffer_get(b);
+ level = btrfs_header_level(b);
+ if (p->need_commit_sem)
+ up_read(&fs_info->commit_root_sem);
+ /*
+ * Ensure that all callers have set skip_locking when
+ * p->search_commit_root = 1.
+ */
+ ASSERT(p->skip_locking == 1);
+
+ goto out;
+ }
+
+ if (p->skip_locking) {
+ b = btrfs_root_node(root);
+ level = btrfs_header_level(b);
+ goto out;
+ }
+
+ /*
+ * If the level is set to maximum, we can skip trying to get the read
+ * lock.
+ */
+ if (write_lock_level < BTRFS_MAX_LEVEL) {
+ /*
+ * We don't know the level of the root node until we actually
+ * have it read locked
+ */
+ b = btrfs_read_lock_root_node(root);
+ level = btrfs_header_level(b);
+ if (level > write_lock_level)
+ goto out;
+
+ /* Whoops, must trade for write lock */
+ btrfs_tree_read_unlock(b);
+ free_extent_buffer(b);
+ }
+
+ b = btrfs_lock_root_node(root);
+ root_lock = BTRFS_WRITE_LOCK;
+
+ /* The level might have changed, check again */
+ level = btrfs_header_level(b);
+
+out:
+ p->nodes[level] = b;
+ if (!p->skip_locking)
+ p->locks[level] = root_lock;
+ /*
+ * Callers are responsible for dropping b's references.
+ */
+ return b;
+}
+
+
/*
* btrfs_search_slot - look for a key in a tree and perform necessary
* modifications to preserve tree invariants.
@@ -2635,7 +2706,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int err;
int level;
int lowest_unlock = 1;
- int root_lock;
/* everything at write_lock_level or lower must be write locked */
int write_lock_level = 0;
u8 lowest_level = 0;
@@ -2673,50 +2743,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
again:
prev_cmp = -1;
- /*
- * we try very hard to do read locks on the root
- */
- root_lock = BTRFS_READ_LOCK;
- level = 0;
- if (p->search_commit_root) {
- /*
- * the commit roots are read only
- * so we always do read locks
- */
- if (p->need_commit_sem)
- down_read(&fs_info->commit_root_sem);
- b = root->commit_root;
- extent_buffer_get(b);
- level = btrfs_header_level(b);
- if (p->need_commit_sem)
- up_read(&fs_info->commit_root_sem);
- if (!p->skip_locking)
- btrfs_tree_read_lock(b);
- } else {
- if (p->skip_locking) {
- b = btrfs_root_node(root);
- level = btrfs_header_level(b);
- } else {
- /* we don't know the level of the root node
- * until we actually have it read locked
- */
- b = btrfs_read_lock_root_node(root);
- level = btrfs_header_level(b);
- if (level <= write_lock_level) {
- /* whoops, must trade for write lock */
- btrfs_tree_read_unlock(b);
- free_extent_buffer(b);
- b = btrfs_lock_root_node(root);
- root_lock = BTRFS_WRITE_LOCK;
-
- /* the level might have changed, check again */
- level = btrfs_header_level(b);
- }
- }
- }
- p->nodes[level] = b;
- if (!p->skip_locking)
- p->locks[level] = root_lock;
+ b = btrfs_search_slot_get_root(root, p, write_lock_level);
while (b) {
level = btrfs_header_level(b);
@@ -5414,12 +5441,24 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
down_read(&fs_info->commit_root_sem);
left_level = btrfs_header_level(left_root->commit_root);
left_root_level = left_level;
- left_path->nodes[left_level] = left_root->commit_root;
+ left_path->nodes[left_level] =
+ btrfs_clone_extent_buffer(left_root->commit_root);
+ if (!left_path->nodes[left_level]) {
+ up_read(&fs_info->commit_root_sem);
+ ret = -ENOMEM;
+ goto out;
+ }
extent_buffer_get(left_path->nodes[left_level]);
right_level = btrfs_header_level(right_root->commit_root);
right_root_level = right_level;
- right_path->nodes[right_level] = right_root->commit_root;
+ right_path->nodes[right_level] =
+ btrfs_clone_extent_buffer(right_root->commit_root);
+ if (!right_path->nodes[right_level]) {
+ up_read(&fs_info->commit_root_sem);
+ ret = -ENOMEM;
+ goto out;
+ }
extent_buffer_get(right_path->nodes[right_level]);
up_read(&fs_info->commit_root_sem);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2771cc56a622..f4bf7874c24a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -739,6 +739,12 @@ struct btrfs_delayed_root;
*/
#define BTRFS_FS_NEED_ASYNC_COMMIT 17
+/*
+ * Indicate that balance has been set up from the ioctl and is in the main
+ * phase. The fs_info::balance_ctl is initialized.
+ */
+#define BTRFS_FS_BALANCE_RUNNING 18
+
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
@@ -838,7 +844,6 @@ struct btrfs_fs_info {
struct mutex transaction_kthread_mutex;
struct mutex cleaner_mutex;
struct mutex chunk_mutex;
- struct mutex volume_mutex;
/*
* this is taken to make sure we don't set block groups ro after
@@ -1004,7 +1009,6 @@ struct btrfs_fs_info {
/* restriper state */
spinlock_t balance_lock;
struct mutex balance_mutex;
- atomic_t balance_running;
atomic_t balance_pause_req;
atomic_t balance_cancel_req;
struct btrfs_balance_control *balance_ctl;
@@ -1219,9 +1223,6 @@ struct btrfs_root {
spinlock_t log_extents_lock[2];
struct list_head logged_list[2];
- spinlock_t orphan_lock;
- atomic_t orphan_inodes;
- struct btrfs_block_rsv *orphan_block_rsv;
int orphan_cleanup_state;
spinlock_t inode_lock;
@@ -2764,13 +2765,9 @@ void btrfs_delalloc_release_space(struct inode *inode,
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
u64 len);
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
-int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode);
-void btrfs_orphan_release_metadata(struct btrfs_inode *inode);
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
- int nitems,
- u64 *qgroup_reserved, bool use_global_rsv);
+ int nitems, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
@@ -2828,7 +2825,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
void check_system_chunk(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, const u64 type);
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
- struct btrfs_fs_info *info, u64 start, u64 end);
+ u64 start, u64 end);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
@@ -3042,11 +3039,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
/* uuid-tree.c */
-int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
+int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
u64 subid);
-int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
+int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
u64 subid);
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
@@ -3163,18 +3158,6 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
struct extent_map *em);
/* inode.c */
-struct btrfs_delalloc_work {
- struct inode *inode;
- int delay_iput;
- struct completion completion;
- struct list_head list;
- struct btrfs_work work;
-};
-
-struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
- int delay_iput);
-void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
-
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
struct page *page, size_t pg_offset, u64 start,
u64 len, int create);
@@ -3182,6 +3165,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes);
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct btrfs_inode *inode);
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
@@ -3191,10 +3176,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
int btrfs_add_link(struct btrfs_trans_handle *trans,
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
const char *name, int name_len, int add_backref, u64 index);
-int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *dir, u64 objectid,
- const char *name, int name_len);
+int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry);
int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
int front);
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
@@ -3202,9 +3184,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct inode *inode, u64 new_size,
u32 min_type);
-int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
- int nr);
+int btrfs_start_delalloc_inodes(struct btrfs_root *root);
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
unsigned int extra_bits,
struct extent_state **cached_state, int dedupe);
@@ -3238,10 +3219,7 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
int btrfs_orphan_add(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode);
int btrfs_orphan_cleanup(struct btrfs_root *root);
-void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
-void btrfs_invalidate_inodes(struct btrfs_root *root);
void btrfs_add_delayed_iput(struct inode *inode);
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
int btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -3260,14 +3238,14 @@ void btrfs_test_inode_set_ops(struct inode *inode);
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int btrfs_ioctl_get_supported_features(void __user *arg);
-void btrfs_update_iflags(struct inode *inode);
+void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
int btrfs_is_empty_uuid(u8 *uuid);
int btrfs_defrag_file(struct inode *inode, struct file *file,
struct btrfs_ioctl_defrag_range_args *range,
u64 newer_than, unsigned long max_pages);
void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs);
ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
struct file *dst_file, u64 dst_loff);
@@ -3765,4 +3743,26 @@ static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
return 0;
}
+static inline void cond_wake_up(struct wait_queue_head *wq)
+{
+ /*
+ * This implies a full smp_mb barrier, see comments for
+ * waitqueue_active why.
+ */
+ if (wq_has_sleeper(wq))
+ wake_up(wq);
+}
+
+static inline void cond_wake_up_nomb(struct wait_queue_head *wq)
+{
+ /*
+ * Special case for conditional wakeup where the barrier required for
+ * waitqueue_active is implied by some of the preceding code. Eg. one
+ * of such atomic operations (atomic_dec_and_return, ...), or a
+ * unlock/lock sequence, etc.
+ */
+ if (waitqueue_active(wq))
+ wake_up(wq);
+}
+
#endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index a8d492dbd3e7..fe6caa7e698b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -460,13 +460,10 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root)
{
int seq = atomic_inc_return(&delayed_root->items_seq);
- /*
- * atomic_dec_return implies a barrier for waitqueue_active
- */
+ /* atomic_dec_return implies a barrier */
if ((atomic_dec_return(&delayed_root->items) <
- BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
- waitqueue_active(&delayed_root->wait))
- wake_up(&delayed_root->wait);
+ BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0))
+ cond_wake_up_nomb(&delayed_root->wait);
}
static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index e1b0651686f7..03dec673d12a 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -286,10 +286,10 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
}
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_ref_node *ref;
struct rb_node *node;
u64 seq = 0;
@@ -323,9 +323,7 @@ again:
}
}
-int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_root *delayed_refs,
- u64 seq)
+int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
{
struct seq_list *elem;
int ret = 0;
@@ -336,10 +334,9 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct seq_list, list);
if (seq >= elem->seq) {
btrfs_debug(fs_info,
- "holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)",
+ "holding back delayed_ref %#x.%x, lowest is %#x.%x",
(u32)(seq >> 32), (u32)seq,
- (u32)(elem->seq >> 32), (u32)elem->seq,
- delayed_refs);
+ (u32)(elem->seq >> 32), (u32)elem->seq);
ret = 1;
}
}
@@ -529,33 +526,20 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
spin_unlock(&existing->lock);
}
-/*
- * helper function to actually insert a head node into the rbtree.
- * this does all the dirty work in terms of maintaining the correct
- * overall modification count.
- */
-static noinline struct btrfs_delayed_ref_head *
-add_delayed_ref_head(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head_ref,
- struct btrfs_qgroup_extent_record *qrecord,
- u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
- int action, int is_data, int is_system,
- int *qrecord_inserted_ret,
- int *old_ref_mod, int *new_ref_mod)
-
+static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
+ struct btrfs_qgroup_extent_record *qrecord,
+ u64 bytenr, u64 num_bytes, u64 ref_root,
+ u64 reserved, int action, bool is_data,
+ bool is_system)
{
- struct btrfs_delayed_ref_head *existing;
- struct btrfs_delayed_ref_root *delayed_refs;
int count_mod = 1;
int must_insert_reserved = 0;
- int qrecord_inserted = 0;
/* If reserved is provided, it must be a data extent. */
BUG_ON(!is_data && reserved);
/*
- * the head node stores the sum of all the mods, so dropping a ref
+ * The head node stores the sum of all the mods, so dropping a ref
* should drop the sum in the head node by one.
*/
if (action == BTRFS_UPDATE_DELAYED_HEAD)
@@ -564,12 +548,11 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
count_mod = -1;
/*
- * BTRFS_ADD_DELAYED_EXTENT means that we need to update
- * the reserved accounting when the extent is finally added, or
- * if a later modification deletes the delayed ref without ever
- * inserting the extent into the extent allocation tree.
- * ref->must_insert_reserved is the flag used to record
- * that accounting mods are required.
+ * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
+ * accounting when the extent is finally added, or if a later
+ * modification deletes the delayed ref without ever inserting the
+ * extent into the extent allocation tree. ref->must_insert_reserved
+ * is the flag used to record that accounting mods are required.
*
* Once we record must_insert_reserved, switch the action to
* BTRFS_ADD_DELAYED_REF because other special casing is not required.
@@ -579,8 +562,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
else
must_insert_reserved = 0;
- delayed_refs = &trans->transaction->delayed_refs;
-
refcount_set(&head_ref->refs, 1);
head_ref->bytenr = bytenr;
head_ref->num_bytes = num_bytes;
@@ -598,7 +579,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
- /* Record qgroup extent info if provided */
if (qrecord) {
if (ref_root && reserved) {
head_ref->qgroup_ref_root = ref_root;
@@ -608,20 +588,44 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
qrecord->bytenr = bytenr;
qrecord->num_bytes = num_bytes;
qrecord->old_roots = NULL;
+ }
+}
+
+/*
+ * helper function to actually insert a head node into the rbtree.
+ * this does all the dirty work in terms of maintaining the correct
+ * overall modification count.
+ */
+static noinline struct btrfs_delayed_ref_head *
+add_delayed_ref_head(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *head_ref,
+ struct btrfs_qgroup_extent_record *qrecord,
+ int action, int *qrecord_inserted_ret,
+ int *old_ref_mod, int *new_ref_mod)
+{
+ struct btrfs_delayed_ref_head *existing;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ int qrecord_inserted = 0;
- if(btrfs_qgroup_trace_extent_nolock(fs_info,
+ delayed_refs = &trans->transaction->delayed_refs;
+
+ /* Record qgroup extent info if provided */
+ if (qrecord) {
+ if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
delayed_refs, qrecord))
kfree(qrecord);
else
qrecord_inserted = 1;
}
- trace_add_delayed_ref_head(fs_info, head_ref, action);
+ trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
existing = htree_insert(&delayed_refs->href_root,
&head_ref->href_node);
if (existing) {
- WARN_ON(ref_root && reserved && existing->qgroup_ref_root
+ WARN_ON(qrecord && head_ref->qgroup_ref_root
+ && head_ref->qgroup_reserved
+ && existing->qgroup_ref_root
&& existing->qgroup_reserved);
update_existing_head_ref(delayed_refs, existing, head_ref,
old_ref_mod);
@@ -634,8 +638,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
} else {
if (old_ref_mod)
*old_ref_mod = 0;
- if (is_data && count_mod < 0)
- delayed_refs->pending_csums += num_bytes;
+ if (head_ref->is_data && head_ref->ref_mod < 0)
+ delayed_refs->pending_csums += head_ref->num_bytes;
delayed_refs->num_heads++;
delayed_refs->num_heads_ready++;
atomic_inc(&delayed_refs->num_entries);
@@ -645,90 +649,48 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
*qrecord_inserted_ret = qrecord_inserted;
if (new_ref_mod)
*new_ref_mod = head_ref->total_ref_mod;
- return head_ref;
-}
-
-/*
- * helper to insert a delayed tree ref into the rbtree.
- */
-static noinline void
-add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head_ref,
- struct btrfs_delayed_ref_node *ref, u64 bytenr,
- u64 num_bytes, u64 parent, u64 ref_root, int level,
- int action)
-{
- struct btrfs_delayed_tree_ref *full_ref;
- struct btrfs_delayed_ref_root *delayed_refs;
- u64 seq = 0;
- int ret;
-
- if (action == BTRFS_ADD_DELAYED_EXTENT)
- action = BTRFS_ADD_DELAYED_REF;
- if (is_fstree(ref_root))
- seq = atomic64_read(&fs_info->tree_mod_seq);
- delayed_refs = &trans->transaction->delayed_refs;
-
- /* first set the basic ref node struct up */
- refcount_set(&ref->refs, 1);
- ref->bytenr = bytenr;
- ref->num_bytes = num_bytes;
- ref->ref_mod = 1;
- ref->action = action;
- ref->is_head = 0;
- ref->in_tree = 1;
- ref->seq = seq;
- RB_CLEAR_NODE(&ref->ref_node);
- INIT_LIST_HEAD(&ref->add_list);
-
- full_ref = btrfs_delayed_node_to_tree_ref(ref);
- full_ref->parent = parent;
- full_ref->root = ref_root;
- if (parent)
- ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
- else
- ref->type = BTRFS_TREE_BLOCK_REF_KEY;
- full_ref->level = level;
-
- trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
-
- ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
-
- /*
- * XXX: memory should be freed at the same level allocated.
- * But bad practice is anywhere... Follow it now. Need cleanup.
- */
- if (ret > 0)
- kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
+ return head_ref;
}
/*
- * helper to insert a delayed data ref into the rbtree.
+ * init_delayed_ref_common - Initialize the structure which represents a
+ * modification to a an extent.
+ *
+ * @fs_info: Internal to the mounted filesystem mount structure.
+ *
+ * @ref: The structure which is going to be initialized.
+ *
+ * @bytenr: The logical address of the extent for which a modification is
+ * going to be recorded.
+ *
+ * @num_bytes: Size of the extent whose modification is being recorded.
+ *
+ * @ref_root: The id of the root where this modification has originated, this
+ * can be either one of the well-known metadata trees or the
+ * subvolume id which references this extent.
+ *
+ * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
+ * BTRFS_ADD_DELAYED_EXTENT
+ *
+ * @ref_type: Holds the type of the extent which is being recorded, can be
+ * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
+ * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
+ * BTRFS_EXTENT_DATA_REF_KEY when recording data extent
*/
-static noinline void
-add_delayed_data_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head_ref,
- struct btrfs_delayed_ref_node *ref, u64 bytenr,
- u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
- u64 offset, int action)
+static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *ref,
+ u64 bytenr, u64 num_bytes, u64 ref_root,
+ int action, u8 ref_type)
{
- struct btrfs_delayed_data_ref *full_ref;
- struct btrfs_delayed_ref_root *delayed_refs;
u64 seq = 0;
- int ret;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
- delayed_refs = &trans->transaction->delayed_refs;
-
if (is_fstree(ref_root))
seq = atomic64_read(&fs_info->tree_mod_seq);
- /* first set the basic ref node struct up */
refcount_set(&ref->refs, 1);
ref->bytenr = bytenr;
ref->num_bytes = num_bytes;
@@ -737,25 +699,9 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
ref->seq = seq;
+ ref->type = ref_type;
RB_CLEAR_NODE(&ref->ref_node);
INIT_LIST_HEAD(&ref->add_list);
-
- full_ref = btrfs_delayed_node_to_data_ref(ref);
- full_ref->parent = parent;
- full_ref->root = ref_root;
- if (parent)
- ref->type = BTRFS_SHARED_DATA_REF_KEY;
- else
- ref->type = BTRFS_EXTENT_DATA_REF_KEY;
-
- full_ref->objectid = owner;
- full_ref->offset = offset;
-
- trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
-
- ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
- if (ret > 0)
- kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
}
/*
@@ -775,13 +721,25 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
- int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
+ bool is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
+ int ret;
+ u8 ref_type;
BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
+ if (parent)
+ ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
+ else
+ ref_type = BTRFS_TREE_BLOCK_REF_KEY;
+ init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
+ ref_root, action, ref_type);
+ ref->root = ref_root;
+ ref->parent = parent;
+ ref->level = level;
+
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref)
goto free_ref;
@@ -793,6 +751,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
goto free_head_ref;
}
+ init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
+ ref_root, 0, action, false, is_system);
head_ref->extent_op = extent_op;
delayed_refs = &trans->transaction->delayed_refs;
@@ -802,15 +762,19 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
- bytenr, num_bytes, 0, 0, action, 0,
- is_system, &qrecord_inserted,
+ head_ref = add_delayed_ref_head(trans, head_ref, record,
+ action, &qrecord_inserted,
old_ref_mod, new_ref_mod);
- add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
- num_bytes, parent, ref_root, level, action);
+ ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
+ trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
+ action == BTRFS_ADD_DELAYED_EXTENT ?
+ BTRFS_ADD_DELAYED_REF : action);
+ if (ret > 0)
+ kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+
if (qrecord_inserted)
btrfs_qgroup_trace_extent_post(fs_info, record);
@@ -839,11 +803,25 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
+ int ret;
+ u8 ref_type;
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
+ if (parent)
+ ref_type = BTRFS_SHARED_DATA_REF_KEY;
+ else
+ ref_type = BTRFS_EXTENT_DATA_REF_KEY;
+ init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
+ ref_root, action, ref_type);
+ ref->root = ref_root;
+ ref->parent = parent;
+ ref->objectid = owner;
+ ref->offset = offset;
+
+
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref) {
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
@@ -861,6 +839,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
}
}
+ init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root,
+ reserved, action, true, false);
head_ref->extent_op = NULL;
delayed_refs = &trans->transaction->delayed_refs;
@@ -870,16 +850,20 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
- bytenr, num_bytes, ref_root, reserved,
- action, 1, 0, &qrecord_inserted,
+ head_ref = add_delayed_ref_head(trans, head_ref, record,
+ action, &qrecord_inserted,
old_ref_mod, new_ref_mod);
- add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
- num_bytes, parent, ref_root, owner, offset,
- action);
+ ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
+ trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
+ action == BTRFS_ADD_DELAYED_EXTENT ?
+ BTRFS_ADD_DELAYED_REF : action);
+ if (ret > 0)
+ kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
+
+
if (qrecord_inserted)
return btrfs_qgroup_trace_extent_post(fs_info, record);
return 0;
@@ -897,19 +881,16 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
if (!head_ref)
return -ENOMEM;
+ init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0,
+ BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data,
+ false);
head_ref->extent_op = extent_op;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- /*
- * extent_ops just modify the flags of an extent and they don't result
- * in ref count changes, hence it's safe to pass false/0 for is_system
- * argument
- */
- add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
- num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
- extent_op->is_data, 0, NULL, NULL, NULL);
+ add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD,
+ NULL, NULL, NULL);
spin_unlock(&delayed_refs->lock);
return 0;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 7f00db50bd24..ea1aecb6a50d 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -251,7 +251,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op);
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head);
@@ -269,9 +268,7 @@ static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
struct btrfs_delayed_ref_head *
btrfs_select_ref_head(struct btrfs_trans_handle *trans);
-int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_root *delayed_refs,
- u64 seq);
+int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);
/*
* helper functions to cast a node into its container
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index f82be266ba4b..e2ba0419297a 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -33,8 +33,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
struct btrfs_device *srcdev,
struct btrfs_device *tgtdev);
static int btrfs_dev_replace_kthread(void *data);
-static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info);
-
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
{
@@ -179,6 +177,105 @@ out:
}
/*
+ * Initialize a new device for device replace target from a given source dev
+ * and path.
+ *
+ * Return 0 and new device in @device_out, otherwise return < 0
+ */
+static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
+ const char *device_path,
+ struct btrfs_device *srcdev,
+ struct btrfs_device **device_out)
+{
+ struct btrfs_device *device;
+ struct block_device *bdev;
+ struct list_head *devices;
+ struct rcu_string *name;
+ u64 devid = BTRFS_DEV_REPLACE_DEVID;
+ int ret = 0;
+
+ *device_out = NULL;
+ if (fs_info->fs_devices->seeding) {
+ btrfs_err(fs_info, "the filesystem is a seed filesystem!");
+ return -EINVAL;
+ }
+
+ bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
+ fs_info->bdev_holder);
+ if (IS_ERR(bdev)) {
+ btrfs_err(fs_info, "target device %s is invalid!", device_path);
+ return PTR_ERR(bdev);
+ }
+
+ filemap_write_and_wait(bdev->bd_inode->i_mapping);
+
+ devices = &fs_info->fs_devices->devices;
+ list_for_each_entry(device, devices, dev_list) {
+ if (device->bdev == bdev) {
+ btrfs_err(fs_info,
+ "target device is in the filesystem!");
+ ret = -EEXIST;
+ goto error;
+ }
+ }
+
+
+ if (i_size_read(bdev->bd_inode) <
+ btrfs_device_get_total_bytes(srcdev)) {
+ btrfs_err(fs_info,
+ "target device is smaller than source device!");
+ ret = -EINVAL;
+ goto error;
+ }
+
+
+ device = btrfs_alloc_device(NULL, &devid, NULL);
+ if (IS_ERR(device)) {
+ ret = PTR_ERR(device);
+ goto error;
+ }
+
+ name = rcu_string_strdup(device_path, GFP_KERNEL);
+ if (!name) {
+ btrfs_free_device(device);
+ ret = -ENOMEM;
+ goto error;
+ }
+ rcu_assign_pointer(device->name, name);
+
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+ device->generation = 0;
+ device->io_width = fs_info->sectorsize;
+ device->io_align = fs_info->sectorsize;
+ device->sector_size = fs_info->sectorsize;
+ device->total_bytes = btrfs_device_get_total_bytes(srcdev);
+ device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
+ device->bytes_used = btrfs_device_get_bytes_used(srcdev);
+ device->commit_total_bytes = srcdev->commit_total_bytes;
+ device->commit_bytes_used = device->bytes_used;
+ device->fs_info = fs_info;
+ device->bdev = bdev;
+ set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
+ set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
+ device->mode = FMODE_EXCL;
+ device->dev_stats_valid = 1;
+ set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
+ device->fs_devices = fs_info->fs_devices;
+ list_add(&device->dev_list, &fs_info->fs_devices->devices);
+ fs_info->fs_devices->num_devices++;
+ fs_info->fs_devices->open_devices++;
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+
+ *device_out = device;
+ return 0;
+
+error:
+ blkdev_put(bdev, FMODE_EXCL);
+ return ret;
+}
+
+/*
* called from commit_transaction. Writes changed device replace state to
* disk.
*/
@@ -317,18 +414,13 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgt_device = NULL;
struct btrfs_device *src_device = NULL;
- /* the disk copy procedure reuses the scrub code */
- mutex_lock(&fs_info->volume_mutex);
ret = btrfs_find_device_by_devspec(fs_info, srcdevid,
srcdev_name, &src_device);
- if (ret) {
- mutex_unlock(&fs_info->volume_mutex);
+ if (ret)
return ret;
- }
ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name,
src_device, &tgt_device);
- mutex_unlock(&fs_info->volume_mutex);
if (ret)
return ret;
@@ -360,7 +452,6 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
dev_replace->cont_reading_from_srcdev_mode = read_src;
WARN_ON(!src_device);
dev_replace->srcdev = src_device;
- WARN_ON(!tgt_device);
dev_replace->tgtdev = tgt_device;
btrfs_info_in_rcu(fs_info,
@@ -503,7 +594,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
* flush all outstanding I/O and inode extent mappings before the
* copy operation is declared as being finished
*/
- ret = btrfs_start_delalloc_roots(fs_info, 0, -1);
+ ret = btrfs_start_delalloc_roots(fs_info, -1);
if (ret) {
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;
@@ -518,7 +609,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
ret = btrfs_commit_transaction(trans);
WARN_ON(ret);
- mutex_lock(&uuid_mutex);
/* keep away write_all_supers() during the finishing procedure */
mutex_lock(&fs_info->fs_devices->device_list_mutex);
mutex_lock(&fs_info->chunk_mutex);
@@ -545,7 +635,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_dev_replace_write_unlock(dev_replace);
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- mutex_unlock(&uuid_mutex);
btrfs_rm_dev_replace_blocked(fs_info);
if (tgt_device)
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
@@ -596,7 +685,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
*/
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- mutex_unlock(&uuid_mutex);
/* replace the sysfs entry */
btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
@@ -800,7 +888,17 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
}
btrfs_dev_replace_write_unlock(dev_replace);
- WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
+ /*
+ * This could collide with a paused balance, but the exclusive op logic
+ * should never allow both to start and pause. We don't want to allow
+ * dev-replace to start anyway.
+ */
+ if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
+ btrfs_info(fs_info,
+ "cannot resume dev-replace, other exclusive operation running");
+ return 0;
+ }
+
task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
return PTR_ERR_OR_ZERO(task);
}
@@ -810,6 +908,7 @@ static int btrfs_dev_replace_kthread(void *data)
struct btrfs_fs_info *fs_info = data;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
u64 progress;
+ int ret;
progress = btrfs_dev_replace_progress(fs_info);
progress = div_u64(progress, 10);
@@ -820,23 +919,14 @@ static int btrfs_dev_replace_kthread(void *data)
btrfs_dev_name(dev_replace->tgtdev),
(unsigned int)progress);
- btrfs_dev_replace_continue_on_mount(fs_info);
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
-
- return 0;
-}
-
-static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
- int ret;
-
ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid,
dev_replace->committed_cursor_left,
btrfs_device_get_total_bytes(dev_replace->srcdev),
&dev_replace->scrub_progress, 0, 1);
ret = btrfs_dev_replace_finishing(fs_info, ret);
WARN_ON(ret);
+
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
return 0;
}
@@ -916,9 +1006,9 @@ void btrfs_dev_replace_clear_lock_blocking(
ASSERT(atomic_read(&dev_replace->read_locks) > 0);
ASSERT(atomic_read(&dev_replace->blocking_readers) > 0);
read_lock(&dev_replace->lock);
- if (atomic_dec_and_test(&dev_replace->blocking_readers) &&
- waitqueue_active(&dev_replace->read_lock_wq))
- wake_up(&dev_replace->read_lock_wq);
+ /* Barrier implied by atomic_dec_and_test */
+ if (atomic_dec_and_test(&dev_replace->blocking_readers))
+ cond_wake_up_nomb(&dev_replace->read_lock_wq);
}
void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
@@ -929,9 +1019,7 @@ void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
{
percpu_counter_sub(&fs_info->bio_counter, amount);
-
- if (waitqueue_active(&fs_info->replace_wait))
- wake_up(&fs_info->replace_wait);
+ cond_wake_up_nomb(&fs_info->replace_wait);
}
void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 60caa68c3618..205092dc9390 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -55,7 +55,6 @@
static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info);
@@ -416,7 +415,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
static int verify_level_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int level,
- struct btrfs_key *first_key)
+ struct btrfs_key *first_key, u64 parent_transid)
{
int found_level;
struct btrfs_key found_key;
@@ -454,10 +453,11 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
if (ret) {
WARN_ON(1);
btrfs_err(fs_info,
-"tree first key mismatch detected, bytenr=%llu key expected=(%llu, %u, %llu) has=(%llu, %u, %llu)",
- eb->start, first_key->objectid, first_key->type,
- first_key->offset, found_key.objectid,
- found_key.type, found_key.offset);
+"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
+ eb->start, parent_transid, first_key->objectid,
+ first_key->type, first_key->offset,
+ found_key.objectid, found_key.type,
+ found_key.offset);
}
#endif
return ret;
@@ -493,7 +493,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
parent_transid, 0))
ret = -EIO;
else if (verify_level_key(fs_info, eb, level,
- first_key))
+ first_key, parent_transid))
ret = -EUCLEAN;
else
break;
@@ -1185,7 +1185,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->inode_tree = RB_ROOT;
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
root->block_rsv = NULL;
- root->orphan_block_rsv = NULL;
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->root_list);
@@ -1195,7 +1194,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&root->ordered_root);
INIT_LIST_HEAD(&root->logged_list[0]);
INIT_LIST_HEAD(&root->logged_list[1]);
- spin_lock_init(&root->orphan_lock);
spin_lock_init(&root->inode_lock);
spin_lock_init(&root->delalloc_lock);
spin_lock_init(&root->ordered_extent_lock);
@@ -1216,7 +1214,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
atomic_set(&root->log_commit[1], 0);
atomic_set(&root->log_writers, 0);
atomic_set(&root->log_batch, 0);
- atomic_set(&root->orphan_inodes, 0);
refcount_set(&root->refs, 1);
atomic_set(&root->will_be_snapshotted, 0);
root->log_transid = 0;
@@ -2164,7 +2161,6 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
{
spin_lock_init(&fs_info->balance_lock);
mutex_init(&fs_info->balance_mutex);
- atomic_set(&fs_info->balance_running, 0);
atomic_set(&fs_info->balance_pause_req, 0);
atomic_set(&fs_info->balance_cancel_req, 0);
fs_info->balance_ctl = NULL;
@@ -2442,6 +2438,211 @@ out:
return ret;
}
+/*
+ * Real super block validation
+ * NOTE: super csum type and incompat features will not be checked here.
+ *
+ * @sb: super block to check
+ * @mirror_num: the super block number to check its bytenr:
+ * 0 the primary (1st) sb
+ * 1, 2 2nd and 3rd backup copy
+ * -1 skip bytenr check
+ */
+static int validate_super(struct btrfs_fs_info *fs_info,
+ struct btrfs_super_block *sb, int mirror_num)
+{
+ u64 nodesize = btrfs_super_nodesize(sb);
+ u64 sectorsize = btrfs_super_sectorsize(sb);
+ int ret = 0;
+
+ if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
+ btrfs_err(fs_info, "no valid FS found");
+ ret = -EINVAL;
+ }
+ if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
+ btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
+ btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
+ ret = -EINVAL;
+ }
+ if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
+ btrfs_err(fs_info, "tree_root level too big: %d >= %d",
+ btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
+ ret = -EINVAL;
+ }
+ if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
+ btrfs_err(fs_info, "chunk_root level too big: %d >= %d",
+ btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
+ ret = -EINVAL;
+ }
+ if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
+ btrfs_err(fs_info, "log_root level too big: %d >= %d",
+ btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
+ ret = -EINVAL;
+ }
+
+ /*
+ * Check sectorsize and nodesize first, other check will need it.
+ * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
+ */
+ if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
+ sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
+ ret = -EINVAL;
+ }
+ /* Only PAGE SIZE is supported yet */
+ if (sectorsize != PAGE_SIZE) {
+ btrfs_err(fs_info,
+ "sectorsize %llu not supported yet, only support %lu",
+ sectorsize, PAGE_SIZE);
+ ret = -EINVAL;
+ }
+ if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
+ nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ btrfs_err(fs_info, "invalid nodesize %llu", nodesize);
+ ret = -EINVAL;
+ }
+ if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
+ btrfs_err(fs_info, "invalid leafsize %u, should be %llu",
+ le32_to_cpu(sb->__unused_leafsize), nodesize);
+ ret = -EINVAL;
+ }
+
+ /* Root alignment check */
+ if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
+ btrfs_warn(fs_info, "tree_root block unaligned: %llu",
+ btrfs_super_root(sb));
+ ret = -EINVAL;
+ }
+ if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
+ btrfs_warn(fs_info, "chunk_root block unaligned: %llu",
+ btrfs_super_chunk_root(sb));
+ ret = -EINVAL;
+ }
+ if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
+ btrfs_warn(fs_info, "log_root block unaligned: %llu",
+ btrfs_super_log_root(sb));
+ ret = -EINVAL;
+ }
+
+ if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
+ btrfs_err(fs_info,
+ "dev_item UUID does not match fsid: %pU != %pU",
+ fs_info->fsid, sb->dev_item.fsid);
+ ret = -EINVAL;
+ }
+
+ /*
+ * Hint to catch really bogus numbers, bitflips or so, more exact checks are
+ * done later
+ */
+ if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
+ btrfs_err(fs_info, "bytes_used is too small %llu",
+ btrfs_super_bytes_used(sb));
+ ret = -EINVAL;
+ }
+ if (!is_power_of_2(btrfs_super_stripesize(sb))) {
+ btrfs_err(fs_info, "invalid stripesize %u",
+ btrfs_super_stripesize(sb));
+ ret = -EINVAL;
+ }
+ if (btrfs_super_num_devices(sb) > (1UL << 31))
+ btrfs_warn(fs_info, "suspicious number of devices: %llu",
+ btrfs_super_num_devices(sb));
+ if (btrfs_super_num_devices(sb) == 0) {
+ btrfs_err(fs_info, "number of devices is 0");
+ ret = -EINVAL;
+ }
+
+ if (mirror_num >= 0 &&
+ btrfs_super_bytenr(sb) != btrfs_sb_offset(mirror_num)) {
+ btrfs_err(fs_info, "super offset mismatch %llu != %u",
+ btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
+ ret = -EINVAL;
+ }
+
+ /*
+ * Obvious sys_chunk_array corruptions, it must hold at least one key
+ * and one chunk
+ */
+ if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
+ btrfs_err(fs_info, "system chunk array too big %u > %u",
+ btrfs_super_sys_array_size(sb),
+ BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+ ret = -EINVAL;
+ }
+ if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
+ + sizeof(struct btrfs_chunk)) {
+ btrfs_err(fs_info, "system chunk array too small %u < %zu",
+ btrfs_super_sys_array_size(sb),
+ sizeof(struct btrfs_disk_key)
+ + sizeof(struct btrfs_chunk));
+ ret = -EINVAL;
+ }
+
+ /*
+ * The generation is a global counter, we'll trust it more than the others
+ * but it's still possible that it's the one that's wrong.
+ */
+ if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb))
+ btrfs_warn(fs_info,
+ "suspicious: generation < chunk_root_generation: %llu < %llu",
+ btrfs_super_generation(sb),
+ btrfs_super_chunk_root_generation(sb));
+ if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb)
+ && btrfs_super_cache_generation(sb) != (u64)-1)
+ btrfs_warn(fs_info,
+ "suspicious: generation < cache_generation: %llu < %llu",
+ btrfs_super_generation(sb),
+ btrfs_super_cache_generation(sb));
+
+ return ret;
+}
+
+/*
+ * Validation of super block at mount time.
+ * Some checks already done early at mount time, like csum type and incompat
+ * flags will be skipped.
+ */
+static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info)
+{
+ return validate_super(fs_info, fs_info->super_copy, 0);
+}
+
+/*
+ * Validation of super block at write time.
+ * Some checks like bytenr check will be skipped as their values will be
+ * overwritten soon.
+ * Extra checks like csum type and incompat flags will be done here.
+ */
+static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
+ struct btrfs_super_block *sb)
+{
+ int ret;
+
+ ret = validate_super(fs_info, sb, -1);
+ if (ret < 0)
+ goto out;
+ if (btrfs_super_csum_type(sb) != BTRFS_CSUM_TYPE_CRC32) {
+ ret = -EUCLEAN;
+ btrfs_err(fs_info, "invalid csum type, has %u want %u",
+ btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32);
+ goto out;
+ }
+ if (btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
+ ret = -EUCLEAN;
+ btrfs_err(fs_info,
+ "invalid incompat flags, has 0x%llx valid mask 0x%llx",
+ btrfs_super_incompat_flags(sb),
+ (unsigned long long)BTRFS_FEATURE_INCOMPAT_SUPP);
+ goto out;
+ }
+out:
+ if (ret < 0)
+ btrfs_err(fs_info,
+ "super block corruption detected before writing it to disk");
+ return ret;
+}
+
int open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options)
@@ -2601,7 +2802,6 @@ int open_ctree(struct super_block *sb,
mutex_init(&fs_info->chunk_mutex);
mutex_init(&fs_info->transaction_kthread_mutex);
mutex_init(&fs_info->cleaner_mutex);
- mutex_init(&fs_info->volume_mutex);
mutex_init(&fs_info->ro_block_group_mutex);
init_rwsem(&fs_info->commit_root_sem);
init_rwsem(&fs_info->cleanup_work_sem);
@@ -2668,7 +2868,7 @@ int open_ctree(struct super_block *sb,
memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
- ret = btrfs_check_super_valid(fs_info);
+ ret = btrfs_validate_mount_super(fs_info);
if (ret) {
btrfs_err(fs_info, "superblock contains fatal errors");
err = -EINVAL;
@@ -3523,7 +3723,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
if (raid_type == BTRFS_RAID_SINGLE)
continue;
- if (!(flags & btrfs_raid_group[raid_type]))
+ if (!(flags & btrfs_raid_array[raid_type].bg_flag))
continue;
min_tolerated = min(min_tolerated,
btrfs_raid_array[raid_type].
@@ -3603,6 +3803,14 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
flags = btrfs_super_flags(sb);
btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
+ ret = btrfs_validate_write_super(fs_info, sb);
+ if (ret < 0) {
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ btrfs_handle_fs_error(fs_info, -EUCLEAN,
+ "unexpected superblock corruption detected");
+ return -EUCLEAN;
+ }
+
ret = write_dev_supers(dev, sb, max_mirrors);
if (ret)
total_errors++;
@@ -3674,8 +3882,6 @@ static void free_fs_root(struct btrfs_root *root)
{
iput(root->ino_cache_inode);
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
- btrfs_free_block_rsv(root->fs_info, root->orphan_block_rsv);
- root->orphan_block_rsv = NULL;
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
if (root->subv_writers)
@@ -3766,7 +3972,6 @@ int btrfs_commit_super(struct btrfs_fs_info *fs_info)
void close_ctree(struct btrfs_fs_info *fs_info)
{
- struct btrfs_root *root = fs_info->tree_root;
int ret;
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
@@ -3818,6 +4023,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
btrfs_free_qgroup_config(fs_info);
+ ASSERT(list_empty(&fs_info->delalloc_roots));
if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
btrfs_info(fs_info, "at unmount delalloc count %lld",
@@ -3861,9 +4067,6 @@ void close_ctree(struct btrfs_fs_info *fs_info)
btrfs_free_stripe_hash_table(fs_info);
btrfs_free_ref_cache(fs_info);
- __btrfs_free_block_rsv(root->orphan_block_rsv);
- root->orphan_block_rsv = NULL;
-
while (!list_empty(&fs_info->pinned_chunks)) {
struct extent_map *em;
@@ -3974,166 +4177,17 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
level, first_key);
}
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_super_block *sb = fs_info->super_copy;
- u64 nodesize = btrfs_super_nodesize(sb);
- u64 sectorsize = btrfs_super_sectorsize(sb);
- int ret = 0;
-
- if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
- btrfs_err(fs_info, "no valid FS found");
- ret = -EINVAL;
- }
- if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
- btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
- btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
- ret = -EINVAL;
- }
- if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
- btrfs_err(fs_info, "tree_root level too big: %d >= %d",
- btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
- ret = -EINVAL;
- }
- if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
- btrfs_err(fs_info, "chunk_root level too big: %d >= %d",
- btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
- ret = -EINVAL;
- }
- if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
- btrfs_err(fs_info, "log_root level too big: %d >= %d",
- btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
- ret = -EINVAL;
- }
-
- /*
- * Check sectorsize and nodesize first, other check will need it.
- * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
- */
- if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
- sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
- btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
- ret = -EINVAL;
- }
- /* Only PAGE SIZE is supported yet */
- if (sectorsize != PAGE_SIZE) {
- btrfs_err(fs_info,
- "sectorsize %llu not supported yet, only support %lu",
- sectorsize, PAGE_SIZE);
- ret = -EINVAL;
- }
- if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
- nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
- btrfs_err(fs_info, "invalid nodesize %llu", nodesize);
- ret = -EINVAL;
- }
- if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
- btrfs_err(fs_info, "invalid leafsize %u, should be %llu",
- le32_to_cpu(sb->__unused_leafsize), nodesize);
- ret = -EINVAL;
- }
-
- /* Root alignment check */
- if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
- btrfs_warn(fs_info, "tree_root block unaligned: %llu",
- btrfs_super_root(sb));
- ret = -EINVAL;
- }
- if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
- btrfs_warn(fs_info, "chunk_root block unaligned: %llu",
- btrfs_super_chunk_root(sb));
- ret = -EINVAL;
- }
- if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
- btrfs_warn(fs_info, "log_root block unaligned: %llu",
- btrfs_super_log_root(sb));
- ret = -EINVAL;
- }
-
- if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
- btrfs_err(fs_info,
- "dev_item UUID does not match fsid: %pU != %pU",
- fs_info->fsid, sb->dev_item.fsid);
- ret = -EINVAL;
- }
-
- /*
- * Hint to catch really bogus numbers, bitflips or so, more exact checks are
- * done later
- */
- if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
- btrfs_err(fs_info, "bytes_used is too small %llu",
- btrfs_super_bytes_used(sb));
- ret = -EINVAL;
- }
- if (!is_power_of_2(btrfs_super_stripesize(sb))) {
- btrfs_err(fs_info, "invalid stripesize %u",
- btrfs_super_stripesize(sb));
- ret = -EINVAL;
- }
- if (btrfs_super_num_devices(sb) > (1UL << 31))
- btrfs_warn(fs_info, "suspicious number of devices: %llu",
- btrfs_super_num_devices(sb));
- if (btrfs_super_num_devices(sb) == 0) {
- btrfs_err(fs_info, "number of devices is 0");
- ret = -EINVAL;
- }
-
- if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
- btrfs_err(fs_info, "super offset mismatch %llu != %u",
- btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
- ret = -EINVAL;
- }
-
- /*
- * Obvious sys_chunk_array corruptions, it must hold at least one key
- * and one chunk
- */
- if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
- btrfs_err(fs_info, "system chunk array too big %u > %u",
- btrfs_super_sys_array_size(sb),
- BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
- ret = -EINVAL;
- }
- if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
- + sizeof(struct btrfs_chunk)) {
- btrfs_err(fs_info, "system chunk array too small %u < %zu",
- btrfs_super_sys_array_size(sb),
- sizeof(struct btrfs_disk_key)
- + sizeof(struct btrfs_chunk));
- ret = -EINVAL;
- }
-
- /*
- * The generation is a global counter, we'll trust it more than the others
- * but it's still possible that it's the one that's wrong.
- */
- if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb))
- btrfs_warn(fs_info,
- "suspicious: generation < chunk_root_generation: %llu < %llu",
- btrfs_super_generation(sb),
- btrfs_super_chunk_root_generation(sb));
- if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb)
- && btrfs_super_cache_generation(sb) != (u64)-1)
- btrfs_warn(fs_info,
- "suspicious: generation < cache_generation: %llu < %llu",
- btrfs_super_generation(sb),
- btrfs_super_cache_generation(sb));
-
- return ret;
-}
-
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
{
+ /* cleanup FS via transaction */
+ btrfs_cleanup_transaction(fs_info);
+
mutex_lock(&fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(fs_info);
mutex_unlock(&fs_info->cleaner_mutex);
down_write(&fs_info->cleanup_work_sem);
up_write(&fs_info->cleanup_work_sem);
-
- /* cleanup FS via transaction */
- btrfs_cleanup_transaction(fs_info);
}
static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
@@ -4258,19 +4312,23 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
list_splice_init(&root->delalloc_inodes, &splice);
while (!list_empty(&splice)) {
+ struct inode *inode = NULL;
btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
delalloc_inodes);
-
- list_del_init(&btrfs_inode->delalloc_inodes);
- clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
- &btrfs_inode->runtime_flags);
+ __btrfs_del_delalloc_inode(root, btrfs_inode);
spin_unlock(&root->delalloc_lock);
- btrfs_invalidate_inodes(btrfs_inode->root);
-
+ /*
+ * Make sure we get a live inode and that it'll not disappear
+ * meanwhile.
+ */
+ inode = igrab(&btrfs_inode->vfs_inode);
+ if (inode) {
+ invalidate_inode_pages2(inode->i_mapping);
+ iput(inode);
+ }
spin_lock(&root->delalloc_lock);
}
-
spin_unlock(&root->delalloc_lock);
}
@@ -4286,7 +4344,6 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
while (!list_empty(&splice)) {
root = list_first_entry(&splice, struct btrfs_root,
delalloc_root);
- list_del_init(&root->delalloc_root);
root = btrfs_grab_fs_root(root);
BUG_ON(!root);
spin_unlock(&fs_info->delalloc_root_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e2f16b68fcbf..3d9fe58c0080 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -66,10 +66,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 flags, u64 owner, u64 offset,
struct btrfs_key *ins, int ref_mod);
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 parent, u64 root_objectid,
- u64 flags, struct btrfs_disk_key *key,
- int level, struct btrfs_key *ins);
+ struct btrfs_delayed_ref_node *node,
+ struct btrfs_delayed_extent_op *extent_op);
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 flags,
int force);
@@ -256,7 +254,7 @@ static int exclude_super_stripes(struct btrfs_fs_info *fs_info,
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
ret = btrfs_rmap_block(fs_info, cache->key.objectid,
- bytenr, 0, &logical, &nr, &stripe_len);
+ bytenr, &logical, &nr, &stripe_len);
if (ret)
return ret;
@@ -343,8 +341,9 @@ static void fragment_free_space(struct btrfs_block_group_cache *block_group)
* since their free space will be released as soon as the transaction commits.
*/
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
- struct btrfs_fs_info *info, u64 start, u64 end)
+ u64 start, u64 end)
{
+ struct btrfs_fs_info *info = block_group->fs_info;
u64 extent_start, extent_end, size, total_added = 0;
int ret;
@@ -489,8 +488,7 @@ next:
if (key.type == BTRFS_EXTENT_ITEM_KEY ||
key.type == BTRFS_METADATA_ITEM_KEY) {
- total_found += add_new_free_space(block_group,
- fs_info, last,
+ total_found += add_new_free_space(block_group, last,
key.objectid);
if (key.type == BTRFS_METADATA_ITEM_KEY)
last = key.objectid +
@@ -508,7 +506,7 @@ next:
}
ret = 0;
- total_found += add_new_free_space(block_group, fs_info, last,
+ total_found += add_new_free_space(block_group, last,
block_group->key.objectid +
block_group->key.offset);
caching_ctl->progress = (u64)-1;
@@ -744,12 +742,12 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
}
static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
- u64 owner, u64 root_objectid)
+ bool metadata, u64 root_objectid)
{
struct btrfs_space_info *space_info;
u64 flags;
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+ if (metadata) {
if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
flags = BTRFS_BLOCK_GROUP_SYSTEM;
else
@@ -2200,8 +2198,11 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
&old_ref_mod, &new_ref_mod);
}
- if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
- add_pinned_bytes(fs_info, -num_bytes, owner, root_objectid);
+ if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) {
+ bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+
+ add_pinned_bytes(fs_info, -num_bytes, metadata, root_objectid);
+ }
return ret;
}
@@ -2428,10 +2429,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
{
int ret = 0;
struct btrfs_delayed_tree_ref *ref;
- struct btrfs_key ins;
u64 parent = 0;
u64 ref_root = 0;
- bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
ref = btrfs_delayed_node_to_tree_ref(node);
trace_run_delayed_tree_ref(fs_info, node, ref, node->action);
@@ -2440,15 +2439,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
parent = ref->parent;
ref_root = ref->root;
- ins.objectid = node->bytenr;
- if (skinny_metadata) {
- ins.offset = ref->level;
- ins.type = BTRFS_METADATA_ITEM_KEY;
- } else {
- ins.offset = node->num_bytes;
- ins.type = BTRFS_EXTENT_ITEM_KEY;
- }
-
if (node->ref_mod != 1) {
btrfs_err(fs_info,
"btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
@@ -2458,11 +2448,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
}
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
BUG_ON(!extent_op || !extent_op->update_flags);
- ret = alloc_reserved_tree_block(trans, fs_info,
- parent, ref_root,
- extent_op->flags_to_set,
- &extent_op->key,
- ref->level, &ins);
+ ret = alloc_reserved_tree_block(trans, node, extent_op);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, fs_info, node,
parent, ref_root,
@@ -2594,8 +2580,8 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
delayed_refs->num_heads--;
rb_erase(&head->href_node, &delayed_refs->href_root);
RB_CLEAR_NODE(&head->href_node);
- spin_unlock(&delayed_refs->lock);
spin_unlock(&head->lock);
+ spin_unlock(&delayed_refs->lock);
atomic_dec(&delayed_refs->num_entries);
trace_run_delayed_ref_head(fs_info, head, 0);
@@ -2700,17 +2686,12 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
* insert_inline_extent_backref()).
*/
spin_lock(&locked_ref->lock);
- btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
- locked_ref);
+ btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
- /*
- * locked_ref is the head node, so we have to go one
- * node back for any delayed ref updates
- */
ref = select_delayed_ref(locked_ref);
if (ref && ref->seq &&
- btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
+ btrfs_check_delayed_seq(fs_info, ref->seq)) {
spin_unlock(&locked_ref->lock);
unselect_delayed_ref_head(delayed_refs, locked_ref);
locked_ref = NULL;
@@ -3142,7 +3123,11 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
struct rb_node *node;
int ret = 0;
+ spin_lock(&root->fs_info->trans_lock);
cur_trans = root->fs_info->running_transaction;
+ if (cur_trans)
+ refcount_inc(&cur_trans->use_count);
+ spin_unlock(&root->fs_info->trans_lock);
if (!cur_trans)
return 0;
@@ -3151,6 +3136,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (!head) {
spin_unlock(&delayed_refs->lock);
+ btrfs_put_transaction(cur_trans);
return 0;
}
@@ -3167,6 +3153,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref_head(head);
+ btrfs_put_transaction(cur_trans);
return -EAGAIN;
}
spin_unlock(&delayed_refs->lock);
@@ -3199,6 +3186,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
}
spin_unlock(&head->lock);
mutex_unlock(&head->mutex);
+ btrfs_put_transaction(cur_trans);
return ret;
}
@@ -3284,7 +3272,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
path = btrfs_alloc_path();
if (!path)
- return -ENOENT;
+ return -ENOMEM;
do {
ret = check_committed_ref(root, path, objectid,
@@ -4019,8 +4007,7 @@ static const char *alloc_name(u64 flags)
};
}
-static int create_space_info(struct btrfs_fs_info *info, u64 flags,
- struct btrfs_space_info **new)
+static int create_space_info(struct btrfs_fs_info *info, u64 flags)
{
struct btrfs_space_info *space_info;
@@ -4058,7 +4045,6 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags,
return ret;
}
- *new = space_info;
list_add_rcu(&space_info->list, &info->space_info);
if (flags & BTRFS_BLOCK_GROUP_DATA)
info->data_sinfo = space_info;
@@ -4115,7 +4101,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
* returns target flags in extended format or 0 if restripe for this
* chunk_type is not in progress
*
- * should be called with either volume_mutex or balance_lock held
+ * should be called with balance_lock held
*/
static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
{
@@ -4171,7 +4157,7 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
/* First, mask out the RAID levels which aren't possible */
for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
if (num_devices >= btrfs_raid_array[raid_type].devs_min)
- allowed |= btrfs_raid_group[raid_type];
+ allowed |= btrfs_raid_array[raid_type].bg_flag;
}
allowed &= flags;
@@ -4334,7 +4320,7 @@ commit_trans:
need_commit--;
if (need_commit > 0) {
- btrfs_start_delalloc_roots(fs_info, 0, -1);
+ btrfs_start_delalloc_roots(fs_info, -1);
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
(u64)-1);
}
@@ -4671,12 +4657,14 @@ again:
trans->allocating_chunk = false;
spin_lock(&space_info->lock);
- if (ret < 0 && ret != -ENOSPC)
- goto out;
- if (ret)
- space_info->full = 1;
- else
+ if (ret < 0) {
+ if (ret == -ENOSPC)
+ space_info->full = 1;
+ else
+ goto out;
+ } else {
ret = 1;
+ }
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
out:
@@ -4785,7 +4773,7 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
* the filesystem is readonly(all dirty pages are written to
* the disk).
*/
- btrfs_start_delalloc_roots(fs_info, 0, nr_items);
+ btrfs_start_delalloc_roots(fs_info, nr_items);
if (!current->journal_info)
btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
}
@@ -5942,44 +5930,6 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
trans->chunk_bytes_reserved = 0;
}
-/* Can only return 0 or -ENOSPC */
-int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
- struct btrfs_root *root = inode->root;
- /*
- * We always use trans->block_rsv here as we will have reserved space
- * for our orphan when starting the transaction, using get_block_rsv()
- * here will sometimes make us choose the wrong block rsv as we could be
- * doing a reloc inode for a non refcounted root.
- */
- struct btrfs_block_rsv *src_rsv = trans->block_rsv;
- struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
-
- /*
- * We need to hold space in order to delete our orphan item once we've
- * added it, so this takes the reservation so we can release it later
- * when we are truly done with the orphan item.
- */
- u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
-
- trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
- num_bytes, 1);
- return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
-}
-
-void btrfs_orphan_release_metadata(struct btrfs_inode *inode)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
- struct btrfs_root *root = inode->root;
- u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
-
- trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
- num_bytes, 0);
- btrfs_block_rsv_release(fs_info, root->orphan_block_rsv, num_bytes);
-}
-
/*
* btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
* root: the root of the parent directory
@@ -5997,7 +5947,6 @@ void btrfs_orphan_release_metadata(struct btrfs_inode *inode)
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int items,
- u64 *qgroup_reserved,
bool use_global_rsv)
{
u64 num_bytes;
@@ -6015,8 +5964,6 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
num_bytes = 0;
}
- *qgroup_reserved = num_bytes;
-
num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
rsv->space_info = __find_space_info(fs_info,
BTRFS_BLOCK_GROUP_METADATA);
@@ -6026,8 +5973,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
if (ret == -ENOSPC && use_global_rsv)
ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
- if (ret && *qgroup_reserved)
- btrfs_qgroup_free_meta_prealloc(root, *qgroup_reserved);
+ if (ret && num_bytes)
+ btrfs_qgroup_free_meta_prealloc(root, num_bytes);
return ret;
}
@@ -6347,6 +6294,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
spin_lock(&info->unused_bgs_lock);
if (list_empty(&cache->bg_list)) {
btrfs_get_block_group(cache);
+ trace_btrfs_add_unused_block_group(cache);
list_add_tail(&cache->bg_list,
&info->unused_bgs);
}
@@ -6504,6 +6452,7 @@ int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
struct btrfs_key key;
int found_type;
int i;
+ int ret = 0;
if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
return 0;
@@ -6520,10 +6469,12 @@ int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
continue;
key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
- __exclude_logged_extent(fs_info, key.objectid, key.offset);
+ ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
+ if (ret)
+ break;
}
- return 0;
+ return ret;
}
static void
@@ -7115,7 +7066,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
}
- ret = add_to_free_space_tree(trans, info, bytenr, num_bytes);
+ ret = add_to_free_space_tree(trans, bytenr, num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -7259,7 +7210,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
}
out:
if (pin)
- add_pinned_bytes(fs_info, buf->len, btrfs_header_level(buf),
+ add_pinned_bytes(fs_info, buf->len, true,
root->root_key.objectid);
if (last_ref) {
@@ -7313,8 +7264,11 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
&old_ref_mod, &new_ref_mod);
}
- if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
- add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
+ if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) {
+ bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+
+ add_pinned_bytes(fs_info, num_bytes, metadata, root_objectid);
+ }
return ret;
}
@@ -7366,24 +7320,6 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
return ret;
}
-static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
- [BTRFS_RAID_RAID10] = "raid10",
- [BTRFS_RAID_RAID1] = "raid1",
- [BTRFS_RAID_DUP] = "dup",
- [BTRFS_RAID_RAID0] = "raid0",
- [BTRFS_RAID_SINGLE] = "single",
- [BTRFS_RAID_RAID5] = "raid5",
- [BTRFS_RAID_RAID6] = "raid6",
-};
-
-static const char *get_raid_name(enum btrfs_raid_types type)
-{
- if (type >= BTRFS_NR_RAID_TYPES)
- return NULL;
-
- return btrfs_raid_type_names[type];
-}
-
enum btrfs_loop_type {
LOOP_CACHING_NOWAIT = 0,
LOOP_CACHING_WAIT = 1,
@@ -7655,7 +7591,7 @@ have_block_group:
if (offset) {
/* we have a block, we're done */
spin_unlock(&last_ptr->refill_lock);
- trace_btrfs_reserve_extent_cluster(fs_info,
+ trace_btrfs_reserve_extent_cluster(
used_block_group,
search_start, num_bytes);
if (used_block_group != block_group) {
@@ -7728,7 +7664,7 @@ refill_cluster:
if (offset) {
/* we found one, proceed */
spin_unlock(&last_ptr->refill_lock);
- trace_btrfs_reserve_extent_cluster(fs_info,
+ trace_btrfs_reserve_extent_cluster(
block_group, search_start,
num_bytes);
goto checks;
@@ -7828,8 +7764,7 @@ checks:
ins->objectid = search_start;
ins->offset = num_bytes;
- trace_btrfs_reserve_extent(fs_info, block_group,
- search_start, num_bytes);
+ trace_btrfs_reserve_extent(block_group, search_start, num_bytes);
btrfs_release_block_group(block_group, delalloc);
break;
loop:
@@ -8177,8 +8112,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
- ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
- ins->offset);
+ ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
if (ret)
return ret;
@@ -8193,37 +8127,52 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
}
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 parent, u64 root_objectid,
- u64 flags, struct btrfs_disk_key *key,
- int level, struct btrfs_key *ins)
+ struct btrfs_delayed_ref_node *node,
+ struct btrfs_delayed_extent_op *extent_op)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_extent_item *extent_item;
+ struct btrfs_key extent_key;
struct btrfs_tree_block_info *block_info;
struct btrfs_extent_inline_ref *iref;
struct btrfs_path *path;
struct extent_buffer *leaf;
+ struct btrfs_delayed_tree_ref *ref;
u32 size = sizeof(*extent_item) + sizeof(*iref);
- u64 num_bytes = ins->offset;
+ u64 num_bytes;
+ u64 flags = extent_op->flags_to_set;
bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
- if (!skinny_metadata)
+ ref = btrfs_delayed_node_to_tree_ref(node);
+
+ extent_key.objectid = node->bytenr;
+ if (skinny_metadata) {
+ extent_key.offset = ref->level;
+ extent_key.type = BTRFS_METADATA_ITEM_KEY;
+ num_bytes = fs_info->nodesize;
+ } else {
+ extent_key.offset = node->num_bytes;
+ extent_key.type = BTRFS_EXTENT_ITEM_KEY;
size += sizeof(*block_info);
+ num_bytes = node->num_bytes;
+ }
path = btrfs_alloc_path();
if (!path) {
- btrfs_free_and_pin_reserved_extent(fs_info, ins->objectid,
+ btrfs_free_and_pin_reserved_extent(fs_info,
+ extent_key.objectid,
fs_info->nodesize);
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
- ins, size);
+ &extent_key, size);
if (ret) {
btrfs_free_path(path);
- btrfs_free_and_pin_reserved_extent(fs_info, ins->objectid,
+ btrfs_free_and_pin_reserved_extent(fs_info,
+ extent_key.objectid,
fs_info->nodesize);
return ret;
}
@@ -8238,42 +8187,41 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
if (skinny_metadata) {
iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
- num_bytes = fs_info->nodesize;
} else {
block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
- btrfs_set_tree_block_key(leaf, block_info, key);
- btrfs_set_tree_block_level(leaf, block_info, level);
+ btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
+ btrfs_set_tree_block_level(leaf, block_info, ref->level);
iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
}
- if (parent > 0) {
+ if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
btrfs_set_extent_inline_ref_type(leaf, iref,
BTRFS_SHARED_BLOCK_REF_KEY);
- btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+ btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
} else {
btrfs_set_extent_inline_ref_type(leaf, iref,
BTRFS_TREE_BLOCK_REF_KEY);
- btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
+ btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
}
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
- ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
+ ret = remove_from_free_space_tree(trans, extent_key.objectid,
num_bytes);
if (ret)
return ret;
- ret = update_block_group(trans, fs_info, ins->objectid,
+ ret = update_block_group(trans, fs_info, extent_key.objectid,
fs_info->nodesize, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
- ins->objectid, ins->offset);
+ extent_key.objectid, extent_key.offset);
BUG();
}
- trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid,
+ trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
fs_info->nodesize);
return ret;
}
@@ -10166,8 +10114,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
} else if (btrfs_block_group_used(&cache->item) == 0) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
- add_new_free_space(cache, info,
- found_key.objectid,
+ add_new_free_space(cache, found_key.objectid,
found_key.objectid +
found_key.offset);
free_excluded_extents(info, cache);
@@ -10197,6 +10144,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
/* Should always be true but just in case. */
if (list_empty(&cache->bg_list)) {
btrfs_get_block_group(cache);
+ trace_btrfs_add_unused_block_group(cache);
list_add_tail(&cache->bg_list,
&info->unused_bgs);
}
@@ -10262,7 +10210,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
key.offset);
if (ret)
btrfs_abort_transaction(trans, ret);
- add_block_group_free_space(trans, fs_info, block_group);
+ add_block_group_free_space(trans, block_group);
/* already aborted the transaction if it failed. */
next:
list_del_init(&block_group->bg_list);
@@ -10303,7 +10251,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
return ret;
}
- add_new_free_space(cache, fs_info, chunk_offset, chunk_offset + size);
+ add_new_free_space(cache, chunk_offset, chunk_offset + size);
free_excluded_extents(fs_info, cache);
@@ -10384,6 +10332,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
BUG_ON(!block_group);
BUG_ON(!block_group->ro);
+ trace_btrfs_remove_block_group(block_group);
/*
* Free the reserved super bytes from this block group before
* remove it.
@@ -10641,7 +10590,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
mutex_unlock(&fs_info->chunk_mutex);
- ret = remove_block_group_free_space(trans, fs_info, block_group);
+ ret = remove_block_group_free_space(trans, block_group);
if (ret)
goto out;
@@ -10748,6 +10697,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* the ro check in case balance is currently acting on
* this block group.
*/
+ trace_btrfs_skip_unused_block_group(block_group);
spin_unlock(&block_group->lock);
up_write(&space_info->groups_sem);
goto next;
@@ -10870,7 +10820,6 @@ next:
int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
{
- struct btrfs_space_info *space_info;
struct btrfs_super_block *disk_super;
u64 features;
u64 flags;
@@ -10886,21 +10835,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
mixed = 1;
flags = BTRFS_BLOCK_GROUP_SYSTEM;
- ret = create_space_info(fs_info, flags, &space_info);
+ ret = create_space_info(fs_info, flags);
if (ret)
goto out;
if (mixed) {
flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
- ret = create_space_info(fs_info, flags, &space_info);
+ ret = create_space_info(fs_info, flags);
} else {
flags = BTRFS_BLOCK_GROUP_METADATA;
- ret = create_space_info(fs_info, flags, &space_info);
+ ret = create_space_info(fs_info, flags);
if (ret)
goto out;
flags = BTRFS_BLOCK_GROUP_DATA;
- ret = create_space_info(fs_info, flags, &space_info);
+ ret = create_space_info(fs_info, flags);
}
out:
return ret;
@@ -11085,12 +11034,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
{
percpu_counter_dec(&root->subv_writers->counter);
- /*
- * Make sure counter is updated before we wake up waiters.
- */
- smp_mb();
- if (waitqueue_active(&root->subv_writers->wait))
- wake_up(&root->subv_writers->wait);
+ cond_wake_up(&root->subv_writers->wait);
}
int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e99b329002cf..51fc015c7d2c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -26,7 +26,7 @@
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
-static struct bio_set *btrfs_bioset;
+static struct bio_set btrfs_bioset;
static inline bool extent_state_in_tree(const struct extent_state *state)
{
@@ -162,20 +162,18 @@ int __init extent_io_init(void)
if (!extent_buffer_cache)
goto free_state_cache;
- btrfs_bioset = bioset_create(BIO_POOL_SIZE,
- offsetof(struct btrfs_io_bio, bio),
- BIOSET_NEED_BVECS);
- if (!btrfs_bioset)
+ if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
+ offsetof(struct btrfs_io_bio, bio),
+ BIOSET_NEED_BVECS))
goto free_buffer_cache;
- if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
+ if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
goto free_bioset;
return 0;
free_bioset:
- bioset_free(btrfs_bioset);
- btrfs_bioset = NULL;
+ bioset_exit(&btrfs_bioset);
free_buffer_cache:
kmem_cache_destroy(extent_buffer_cache);
@@ -198,8 +196,7 @@ void __cold extent_io_exit(void)
rcu_barrier();
kmem_cache_destroy(extent_state_cache);
kmem_cache_destroy(extent_buffer_cache);
- if (btrfs_bioset)
- bioset_free(btrfs_bioset);
+ bioset_exit(&btrfs_bioset);
}
void extent_io_tree_init(struct extent_io_tree *tree,
@@ -2679,7 +2676,7 @@ struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
{
struct bio *bio;
- bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset);
+ bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_byte >> 9;
btrfs_io_bio_init(btrfs_io_bio(bio));
@@ -2692,7 +2689,7 @@ struct bio *btrfs_bio_clone(struct bio *bio)
struct bio *new;
/* Bio allocation backed by a bioset does not fail */
- new = bio_clone_fast(bio, GFP_NOFS, btrfs_bioset);
+ new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
btrfs_bio = btrfs_io_bio(new);
btrfs_io_bio_init(btrfs_bio);
btrfs_bio->iter = bio->bi_iter;
@@ -2704,7 +2701,7 @@ struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
struct bio *bio;
/* Bio allocation backed by a bioset does not fail */
- bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, btrfs_bioset);
+ bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
}
@@ -2715,7 +2712,7 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
struct btrfs_io_bio *btrfs_bio;
/* this will never fail when it's backed by a bioset */
- bio = bio_clone_fast(orig, GFP_NOFS, btrfs_bioset);
+ bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
ASSERT(bio);
btrfs_bio = btrfs_io_bio(bio);
@@ -4109,14 +4106,13 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
return ret;
}
-int extent_writepages(struct extent_io_tree *tree,
- struct address_space *mapping,
+int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
int ret = 0;
struct extent_page_data epd = {
.bio = NULL,
- .tree = tree,
+ .tree = &BTRFS_I(mapping->host)->io_tree,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
@@ -4126,9 +4122,8 @@ int extent_writepages(struct extent_io_tree *tree,
return ret;
}
-int extent_readpages(struct extent_io_tree *tree,
- struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+int extent_readpages(struct address_space *mapping, struct list_head *pages,
+ unsigned nr_pages)
{
struct bio *bio = NULL;
unsigned page_idx;
@@ -4136,6 +4131,7 @@ int extent_readpages(struct extent_io_tree *tree,
struct page *pagepool[16];
struct page *page;
struct extent_map *em_cached = NULL;
+ struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
int nr = 0;
u64 prev_em_start = (u64)-1;
@@ -4202,8 +4198,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
* are locked or under IO and drops the related state bits if it is safe
* to drop the page.
*/
-static int try_release_extent_state(struct extent_map_tree *map,
- struct extent_io_tree *tree,
+static int try_release_extent_state(struct extent_io_tree *tree,
struct page *page, gfp_t mask)
{
u64 start = page_offset(page);
@@ -4238,13 +4233,13 @@ static int try_release_extent_state(struct extent_map_tree *map,
* in the range corresponding to the page, both state records and extent
* map records are removed
*/
-int try_release_extent_mapping(struct extent_map_tree *map,
- struct extent_io_tree *tree, struct page *page,
- gfp_t mask)
+int try_release_extent_mapping(struct page *page, gfp_t mask)
{
struct extent_map *em;
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
+ struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree;
+ struct extent_map_tree *map = &BTRFS_I(page->mapping->host)->extent_tree;
if (gfpflags_allow_blocking(mask) &&
page->mapping->host->i_size > SZ_16M) {
@@ -4278,7 +4273,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
free_extent_map(em);
}
}
- return try_release_extent_state(map, tree, page, mask);
+ return try_release_extent_state(tree, page, mask);
}
/*
@@ -5620,46 +5615,6 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
}
}
-void le_bitmap_set(u8 *map, unsigned int start, int len)
-{
- u8 *p = map + BIT_BYTE(start);
- const unsigned int size = start + len;
- int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
- u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
-
- while (len - bits_to_set >= 0) {
- *p |= mask_to_set;
- len -= bits_to_set;
- bits_to_set = BITS_PER_BYTE;
- mask_to_set = ~0;
- p++;
- }
- if (len) {
- mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
- *p |= mask_to_set;
- }
-}
-
-void le_bitmap_clear(u8 *map, unsigned int start, int len)
-{
- u8 *p = map + BIT_BYTE(start);
- const unsigned int size = start + len;
- int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE);
- u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start);
-
- while (len - bits_to_clear >= 0) {
- *p &= ~mask_to_clear;
- len -= bits_to_clear;
- bits_to_clear = BITS_PER_BYTE;
- mask_to_clear = ~0;
- p++;
- }
- if (len) {
- mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
- *p &= ~mask_to_clear;
- }
-}
-
/*
* eb_bitmap_offset() - calculate the page and offset of the byte containing the
* given bit number
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a53009694b16..0bfd4aeb822d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -79,14 +79,6 @@
#define BITMAP_LAST_BYTE_MASK(nbits) \
(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
-static inline int le_test_bit(int nr, const u8 *addr)
-{
- return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
-}
-
-void le_bitmap_set(u8 *map, unsigned int start, int len);
-void le_bitmap_clear(u8 *map, unsigned int start, int len);
-
struct extent_state;
struct btrfs_root;
struct btrfs_inode;
@@ -278,9 +270,7 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
int create);
void extent_io_tree_init(struct extent_io_tree *tree, void *private_data);
-int try_release_extent_mapping(struct extent_map_tree *map,
- struct extent_io_tree *tree, struct page *page,
- gfp_t mask);
+int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached);
@@ -421,14 +411,12 @@ int extent_invalidatepage(struct extent_io_tree *tree,
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
int mode);
-int extent_writepages(struct extent_io_tree *tree,
- struct address_space *mapping,
+int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
-int extent_readpages(struct extent_io_tree *tree,
- struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages);
+int extent_readpages(struct address_space *mapping, struct list_head *pages,
+ unsigned nr_pages);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
void set_page_extent_mapped(struct page *page);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 1b8a078f92eb..6648d55e5339 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -518,6 +518,7 @@ static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
/**
* btrfs_add_extent_mapping - add extent mapping into em_tree
+ * @fs_info - used for tracepoint
* @em_tree - the extent tree into which we want to insert the extent mapping
* @em_in - extent we are inserting
* @start - start of the logical range btrfs_get_extent() is requesting
@@ -535,7 +536,8 @@ static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
* Return 0 on success, otherwise -EEXIST.
*
*/
-int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
+int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree,
struct extent_map **em_in, u64 start, u64 len)
{
int ret;
@@ -553,7 +555,7 @@ int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
existing = search_extent_mapping(em_tree, start, len);
- trace_btrfs_handle_em_exist(existing, em, start, len);
+ trace_btrfs_handle_em_exist(fs_info, existing, em, start, len);
/*
* existing will always be non-NULL, since there must be
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 5fcb80a6ce37..25d985e7532a 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -92,7 +92,8 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len);
-int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
+int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree,
struct extent_map **em_in, u64 start, u64 len);
#endif
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index e5b569bebc73..d5f80cb300be 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -253,10 +253,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
truncate_pagecache(inode, 0);
/*
- * We don't need an orphan item because truncating the free space cache
- * will never be split across transactions.
- * We don't need to check for -EAGAIN because we're a free space
- * cache inode
+ * We skip the throttling logic for free space cache inodes, so we don't
+ * need to check for -EAGAIN.
*/
ret = btrfs_truncate_inode_items(trans, root, inode,
0, BTRFS_EXTENT_DATA_KEY);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 32a0f6cb5594..b5950aacd697 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -12,7 +12,6 @@
#include "transaction.h"
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
@@ -45,11 +44,10 @@ void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache)
}
static int add_new_free_space_info(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
- struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_root *root = trans->fs_info->free_space_root;
struct btrfs_free_space_info *info;
struct btrfs_key key;
struct extent_buffer *leaf;
@@ -138,10 +136,11 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
}
-static u8 *alloc_bitmap(u32 bitmap_size)
+static unsigned long *alloc_bitmap(u32 bitmap_size)
{
- u8 *ret;
+ unsigned long *ret;
unsigned int nofs_flag;
+ u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
/*
* GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
@@ -152,21 +151,42 @@ static u8 *alloc_bitmap(u32 bitmap_size)
* know that recursion is unsafe.
*/
nofs_flag = memalloc_nofs_save();
- ret = kvzalloc(bitmap_size, GFP_KERNEL);
+ ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
return ret;
}
+static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
+{
+ u8 *p = ((u8 *)map) + BIT_BYTE(start);
+ const unsigned int size = start + len;
+ int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+ u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
+
+ while (len - bits_to_set >= 0) {
+ *p |= mask_to_set;
+ len -= bits_to_set;
+ bits_to_set = BITS_PER_BYTE;
+ mask_to_set = ~0;
+ p++;
+ }
+ if (len) {
+ mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+
int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->free_space_root;
struct btrfs_free_space_info *info;
struct btrfs_key key, found_key;
struct extent_buffer *leaf;
- u8 *bitmap, *bitmap_cursor;
+ unsigned long *bitmap;
+ char *bitmap_cursor;
u64 start, end;
u64 bitmap_range, i;
u32 bitmap_size, flags, expected_extent_count;
@@ -255,7 +275,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
goto out;
}
- bitmap_cursor = bitmap;
+ bitmap_cursor = (char *)bitmap;
bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
i = start;
while (i < end) {
@@ -296,21 +316,18 @@ out:
}
int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->free_space_root;
struct btrfs_free_space_info *info;
struct btrfs_key key, found_key;
struct extent_buffer *leaf;
- u8 *bitmap;
+ unsigned long *bitmap;
u64 start, end;
- /* Initialize to silence GCC. */
- u64 extent_start = 0;
- u64 offset;
u32 bitmap_size, flags, expected_extent_count;
- int prev_bit = 0, bit, bitnr;
+ unsigned long nrbits, start_bit, end_bit;
u32 extent_count = 0;
int done = 0, nr;
int ret;
@@ -348,7 +365,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
break;
} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
unsigned long ptr;
- u8 *bitmap_cursor;
+ char *bitmap_cursor;
u32 bitmap_pos, data_size;
ASSERT(found_key.objectid >= start);
@@ -358,7 +375,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
bitmap_pos = div_u64(found_key.objectid - start,
fs_info->sectorsize *
BITS_PER_BYTE);
- bitmap_cursor = bitmap + bitmap_pos;
+ bitmap_cursor = ((char *)bitmap) + bitmap_pos;
data_size = free_space_bitmap_size(found_key.offset,
fs_info->sectorsize);
@@ -392,32 +409,16 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
- offset = start;
- bitnr = 0;
- while (offset < end) {
- bit = !!le_test_bit(bitnr, bitmap);
- if (prev_bit == 0 && bit == 1) {
- extent_start = offset;
- } else if (prev_bit == 1 && bit == 0) {
- key.objectid = extent_start;
- key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
- key.offset = offset - extent_start;
-
- ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
- if (ret)
- goto out;
- btrfs_release_path(path);
+ nrbits = div_u64(block_group->key.offset, block_group->fs_info->sectorsize);
+ start_bit = find_next_bit_le(bitmap, nrbits, 0);
- extent_count++;
- }
- prev_bit = bit;
- offset += fs_info->sectorsize;
- bitnr++;
- }
- if (prev_bit == 1) {
- key.objectid = extent_start;
+ while (start_bit < nrbits) {
+ end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
+ ASSERT(start_bit < end_bit);
+
+ key.objectid = start + start_bit * block_group->fs_info->sectorsize;
key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
- key.offset = end - extent_start;
+ key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
if (ret)
@@ -425,6 +426,8 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
extent_count++;
+
+ start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
}
if (extent_count != expected_extent_count) {
@@ -446,7 +449,6 @@ out:
}
static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path,
int new_extents)
@@ -459,7 +461,8 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
if (new_extents == 0)
return 0;
- info = search_free_space_info(trans, fs_info, block_group, path, 1);
+ info = search_free_space_info(trans, trans->fs_info, block_group, path,
+ 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
goto out;
@@ -474,12 +477,10 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
extent_count > block_group->bitmap_high_thresh) {
- ret = convert_free_space_to_bitmaps(trans, fs_info, block_group,
- path);
+ ret = convert_free_space_to_bitmaps(trans, block_group, path);
} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
extent_count < block_group->bitmap_low_thresh) {
- ret = convert_free_space_to_extents(trans, fs_info, block_group,
- path);
+ ret = convert_free_space_to_extents(trans, block_group, path);
}
out:
@@ -576,12 +577,11 @@ static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
* the bitmap.
*/
static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path,
u64 start, u64 size, int remove)
{
- struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_root *root = block_group->fs_info->free_space_root;
struct btrfs_key key;
u64 end = start + size;
u64 cur_start, cur_size;
@@ -682,7 +682,7 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+ ret = update_free_space_extent_count(trans, block_group, path,
new_extents);
out:
@@ -690,12 +690,11 @@ out:
}
static int remove_free_space_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path,
u64 start, u64 size)
{
- struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_root *root = trans->fs_info->free_space_root;
struct btrfs_key key;
u64 found_start, found_end;
u64 end = start + size;
@@ -769,7 +768,7 @@ static int remove_free_space_extent(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+ ret = update_free_space_extent_count(trans, block_group, path,
new_extents);
out:
@@ -777,7 +776,6 @@ out:
}
int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, u64 start, u64 size)
{
@@ -786,36 +784,35 @@ int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
int ret;
if (block_group->needs_free_space) {
- ret = __add_block_group_free_space(trans, fs_info, block_group,
- path);
+ ret = __add_block_group_free_space(trans, block_group, path);
if (ret)
return ret;
}
- info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+ info = search_free_space_info(NULL, trans->fs_info, block_group, path,
+ 0);
if (IS_ERR(info))
return PTR_ERR(info);
flags = btrfs_free_space_flags(path->nodes[0], info);
btrfs_release_path(path);
if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
- return modify_free_space_bitmap(trans, fs_info, block_group,
- path, start, size, 1);
+ return modify_free_space_bitmap(trans, block_group, path,
+ start, size, 1);
} else {
- return remove_free_space_extent(trans, fs_info, block_group,
- path, start, size);
+ return remove_free_space_extent(trans, block_group, path,
+ start, size);
}
}
int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 start, u64 size)
{
struct btrfs_block_group_cache *block_group;
struct btrfs_path *path;
int ret;
- if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
return 0;
path = btrfs_alloc_path();
@@ -824,7 +821,7 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
goto out;
}
- block_group = btrfs_lookup_block_group(fs_info, start);
+ block_group = btrfs_lookup_block_group(trans->fs_info, start);
if (!block_group) {
ASSERT(0);
ret = -ENOENT;
@@ -832,8 +829,8 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
}
mutex_lock(&block_group->free_space_lock);
- ret = __remove_from_free_space_tree(trans, fs_info, block_group, path,
- start, size);
+ ret = __remove_from_free_space_tree(trans, block_group, path, start,
+ size);
mutex_unlock(&block_group->free_space_lock);
btrfs_put_block_group(block_group);
@@ -845,12 +842,11 @@ out:
}
static int add_free_space_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path,
u64 start, u64 size)
{
- struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_root *root = trans->fs_info->free_space_root;
struct btrfs_key key, new_key;
u64 found_start, found_end;
u64 end = start + size;
@@ -965,7 +961,7 @@ insert:
goto out;
btrfs_release_path(path);
- ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+ ret = update_free_space_extent_count(trans, block_group, path,
new_extents);
out:
@@ -973,17 +969,16 @@ out:
}
int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, u64 start, u64 size)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_free_space_info *info;
u32 flags;
int ret;
if (block_group->needs_free_space) {
- ret = __add_block_group_free_space(trans, fs_info, block_group,
- path);
+ ret = __add_block_group_free_space(trans, block_group, path);
if (ret)
return ret;
}
@@ -995,23 +990,22 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
- return modify_free_space_bitmap(trans, fs_info, block_group,
- path, start, size, 0);
+ return modify_free_space_bitmap(trans, block_group, path,
+ start, size, 0);
} else {
- return add_free_space_extent(trans, fs_info, block_group, path,
- start, size);
+ return add_free_space_extent(trans, block_group, path, start,
+ size);
}
}
int add_to_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 start, u64 size)
{
struct btrfs_block_group_cache *block_group;
struct btrfs_path *path;
int ret;
- if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
return 0;
path = btrfs_alloc_path();
@@ -1020,7 +1014,7 @@ int add_to_free_space_tree(struct btrfs_trans_handle *trans,
goto out;
}
- block_group = btrfs_lookup_block_group(fs_info, start);
+ block_group = btrfs_lookup_block_group(trans->fs_info, start);
if (!block_group) {
ASSERT(0);
ret = -ENOENT;
@@ -1028,8 +1022,7 @@ int add_to_free_space_tree(struct btrfs_trans_handle *trans,
}
mutex_lock(&block_group->free_space_lock);
- ret = __add_to_free_space_tree(trans, fs_info, block_group, path, start,
- size);
+ ret = __add_to_free_space_tree(trans, block_group, path, start, size);
mutex_unlock(&block_group->free_space_lock);
btrfs_put_block_group(block_group);
@@ -1046,10 +1039,9 @@ out:
* through the normal add/remove hooks.
*/
static int populate_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group)
{
- struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_root *extent_root = trans->fs_info->extent_root;
struct btrfs_path *path, *path2;
struct btrfs_key key;
u64 start, end;
@@ -1066,7 +1058,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
return -ENOMEM;
}
- ret = add_new_free_space_info(trans, fs_info, block_group, path2);
+ ret = add_new_free_space_info(trans, block_group, path2);
if (ret)
goto out;
@@ -1099,7 +1091,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
break;
if (start < key.objectid) {
- ret = __add_to_free_space_tree(trans, fs_info,
+ ret = __add_to_free_space_tree(trans,
block_group,
path2, start,
key.objectid -
@@ -1109,7 +1101,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
}
start = key.objectid;
if (key.type == BTRFS_METADATA_ITEM_KEY)
- start += fs_info->nodesize;
+ start += trans->fs_info->nodesize;
else
start += key.offset;
} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
@@ -1124,8 +1116,8 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
break;
}
if (start < end) {
- ret = __add_to_free_space_tree(trans, fs_info, block_group,
- path2, start, end - start);
+ ret = __add_to_free_space_tree(trans, block_group, path2,
+ start, end - start);
if (ret)
goto out_locked;
}
@@ -1165,7 +1157,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
while (node) {
block_group = rb_entry(node, struct btrfs_block_group_cache,
cache_node);
- ret = populate_free_space_tree(trans, fs_info, block_group);
+ ret = populate_free_space_tree(trans, block_group);
if (ret)
goto abort;
node = rb_next(node);
@@ -1269,7 +1261,6 @@ abort:
}
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
@@ -1277,19 +1268,19 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
block_group->needs_free_space = 0;
- ret = add_new_free_space_info(trans, fs_info, block_group, path);
+ ret = add_new_free_space_info(trans, block_group, path);
if (ret)
return ret;
- return __add_to_free_space_tree(trans, fs_info, block_group, path,
+ return __add_to_free_space_tree(trans, block_group, path,
block_group->key.objectid,
block_group->key.offset);
}
int add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_path *path = NULL;
int ret = 0;
@@ -1306,7 +1297,7 @@ int add_block_group_free_space(struct btrfs_trans_handle *trans,
goto out;
}
- ret = __add_block_group_free_space(trans, fs_info, block_group, path);
+ ret = __add_block_group_free_space(trans, block_group, path);
out:
btrfs_free_path(path);
@@ -1317,10 +1308,9 @@ out:
}
int remove_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group)
{
- struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_root *root = trans->fs_info->free_space_root;
struct btrfs_path *path;
struct btrfs_key key, found_key;
struct extent_buffer *leaf;
@@ -1328,7 +1318,7 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans,
int done = 0, nr;
int ret;
- if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
return 0;
if (block_group->needs_free_space) {
@@ -1439,7 +1429,6 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
extent_start = offset;
} else if (prev_bit == 1 && bit == 0) {
total_found += add_new_free_space(block_group,
- fs_info,
extent_start,
offset);
if (total_found > CACHING_CTL_WAKE_UP) {
@@ -1453,8 +1442,8 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
}
}
if (prev_bit == 1) {
- total_found += add_new_free_space(block_group, fs_info,
- extent_start, end);
+ total_found += add_new_free_space(block_group, extent_start,
+ end);
extent_count++;
}
@@ -1511,8 +1500,7 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
caching_ctl->progress = key.objectid;
- total_found += add_new_free_space(block_group, fs_info,
- key.objectid,
+ total_found += add_new_free_space(block_group, key.objectid,
key.objectid + key.offset);
if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index 874b4feecad2..3133651d7d70 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -19,16 +19,12 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info);
int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info);
int load_free_space_tree(struct btrfs_caching_control *caching_ctl);
int add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group);
int remove_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group);
int add_to_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 start, u64 size);
int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 start, u64 size);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
@@ -38,19 +34,15 @@ search_free_space_info(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, int cow);
int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, u64 start, u64 size);
int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, u64 start, u64 size);
int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
int free_space_test_bit(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d241285a0d2a..89b208201783 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1018,8 +1018,10 @@ static noinline int cow_file_range(struct inode *inode,
ram_size, /* ram_bytes */
BTRFS_COMPRESS_NONE, /* compress_type */
BTRFS_ORDERED_REGULAR /* type */);
- if (IS_ERR(em))
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
goto out_reserve;
+ }
free_extent_map(em);
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
@@ -1156,13 +1158,10 @@ static noinline void async_cow_submit(struct btrfs_work *work)
nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
PAGE_SHIFT;
- /*
- * atomic_sub_return implies a barrier for waitqueue_active
- */
+ /* atomic_sub_return implies a barrier */
if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
- 5 * SZ_1M &&
- waitqueue_active(&fs_info->async_submit_wait))
- wake_up(&fs_info->async_submit_wait);
+ 5 * SZ_1M)
+ cond_wake_up_nomb(&fs_info->async_submit_wait);
if (async_cow->inode)
submit_compressed_extents(async_cow->inode, async_cow);
@@ -1373,6 +1372,13 @@ next_slot:
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi))
goto out_check;
+ /*
+ * Do the same check as in btrfs_cross_ref_exist but
+ * without the unnecessary search.
+ */
+ if (btrfs_file_extent_generation(leaf, fi) <=
+ btrfs_root_last_snapshot(&root->root_item))
+ goto out_check;
if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
goto out_check;
if (btrfs_extent_readonly(fs_info, disk_bytenr))
@@ -1742,24 +1748,32 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
spin_unlock(&root->delalloc_lock);
}
-static void btrfs_del_delalloc_inode(struct btrfs_root *root,
- struct btrfs_inode *inode)
+
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
- spin_lock(&root->delalloc_lock);
if (!list_empty(&inode->delalloc_inodes)) {
list_del_init(&inode->delalloc_inodes);
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&inode->runtime_flags);
root->nr_delalloc_inodes--;
if (!root->nr_delalloc_inodes) {
+ ASSERT(list_empty(&root->delalloc_inodes));
spin_lock(&fs_info->delalloc_root_lock);
BUG_ON(list_empty(&root->delalloc_root));
list_del_init(&root->delalloc_root);
spin_unlock(&fs_info->delalloc_root_lock);
}
}
+}
+
+static void btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct btrfs_inode *inode)
+{
+ spin_lock(&root->delalloc_lock);
+ __btrfs_del_delalloc_inode(root, inode);
spin_unlock(&root->delalloc_lock);
}
@@ -3151,6 +3165,9 @@ out:
/* once for the tree */
btrfs_put_ordered_extent(ordered_extent);
+ /* Try to release some metadata so we don't get an OOM but don't wait */
+ btrfs_btree_balance_dirty_nodelay(fs_info);
+
return ret;
}
@@ -3293,177 +3310,31 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
}
/*
- * This is called in transaction commit time. If there are no orphan
- * files in the subvolume, it removes orphan item and frees block_rsv
- * structure.
- */
-void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *block_rsv;
- int ret;
-
- if (atomic_read(&root->orphan_inodes) ||
- root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
- return;
-
- spin_lock(&root->orphan_lock);
- if (atomic_read(&root->orphan_inodes)) {
- spin_unlock(&root->orphan_lock);
- return;
- }
-
- if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) {
- spin_unlock(&root->orphan_lock);
- return;
- }
-
- block_rsv = root->orphan_block_rsv;
- root->orphan_block_rsv = NULL;
- spin_unlock(&root->orphan_lock);
-
- if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state) &&
- btrfs_root_refs(&root->root_item) > 0) {
- ret = btrfs_del_orphan_item(trans, fs_info->tree_root,
- root->root_key.objectid);
- if (ret)
- btrfs_abort_transaction(trans, ret);
- else
- clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
- &root->state);
- }
-
- if (block_rsv) {
- WARN_ON(block_rsv->size > 0);
- btrfs_free_block_rsv(fs_info, block_rsv);
- }
-}
-
-/*
- * This creates an orphan entry for the given inode in case something goes
- * wrong in the middle of an unlink/truncate.
- *
- * NOTE: caller of this function should reserve 5 units of metadata for
- * this function.
+ * This creates an orphan entry for the given inode in case something goes wrong
+ * in the middle of an unlink.
*/
int btrfs_orphan_add(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode)
+ struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
- struct btrfs_root *root = inode->root;
- struct btrfs_block_rsv *block_rsv = NULL;
- int reserve = 0;
- bool insert = false;
int ret;
- if (!root->orphan_block_rsv) {
- block_rsv = btrfs_alloc_block_rsv(fs_info,
- BTRFS_BLOCK_RSV_TEMP);
- if (!block_rsv)
- return -ENOMEM;
- }
-
- if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &inode->runtime_flags))
- insert = true;
-
- if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
- &inode->runtime_flags))
- reserve = 1;
-
- spin_lock(&root->orphan_lock);
- /* If someone has created ->orphan_block_rsv, be happy to use it. */
- if (!root->orphan_block_rsv) {
- root->orphan_block_rsv = block_rsv;
- } else if (block_rsv) {
- btrfs_free_block_rsv(fs_info, block_rsv);
- block_rsv = NULL;
- }
-
- if (insert)
- atomic_inc(&root->orphan_inodes);
- spin_unlock(&root->orphan_lock);
-
- /* grab metadata reservation from transaction handle */
- if (reserve) {
- ret = btrfs_orphan_reserve_metadata(trans, inode);
- ASSERT(!ret);
- if (ret) {
- /*
- * dec doesn't need spin_lock as ->orphan_block_rsv
- * would be released only if ->orphan_inodes is
- * zero.
- */
- atomic_dec(&root->orphan_inodes);
- clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
- &inode->runtime_flags);
- if (insert)
- clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &inode->runtime_flags);
- return ret;
- }
- }
-
- /* insert an orphan item to track this unlinked/truncated file */
- if (insert) {
- ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
- if (ret) {
- if (reserve) {
- clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
- &inode->runtime_flags);
- btrfs_orphan_release_metadata(inode);
- }
- /*
- * btrfs_orphan_commit_root may race with us and set
- * ->orphan_block_rsv to zero, in order to avoid that,
- * decrease ->orphan_inodes after everything is done.
- */
- atomic_dec(&root->orphan_inodes);
- if (ret != -EEXIST) {
- clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &inode->runtime_flags);
- btrfs_abort_transaction(trans, ret);
- return ret;
- }
- }
- ret = 0;
+ ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode));
+ if (ret && ret != -EEXIST) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
}
return 0;
}
/*
- * We have done the truncate/delete so we can go ahead and remove the orphan
- * item for this particular inode.
+ * We have done the delete so we can go ahead and remove the orphan item for
+ * this particular inode.
*/
static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
- struct btrfs_root *root = inode->root;
- int delete_item = 0;
- int ret = 0;
-
- if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &inode->runtime_flags))
- delete_item = 1;
-
- if (delete_item && trans)
- ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
-
- if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
- &inode->runtime_flags))
- btrfs_orphan_release_metadata(inode);
-
- /*
- * btrfs_orphan_commit_root may race with us and set ->orphan_block_rsv
- * to zero, in order to avoid that, decrease ->orphan_inodes after
- * everything is done.
- */
- if (delete_item)
- atomic_dec(&root->orphan_inodes);
-
- return ret;
+ return btrfs_del_orphan_item(trans, inode->root, btrfs_ino(inode));
}
/*
@@ -3479,7 +3350,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
struct btrfs_trans_handle *trans;
struct inode *inode;
u64 last_objectid = 0;
- int ret = 0, nr_unlink = 0, nr_truncate = 0;
+ int ret = 0, nr_unlink = 0;
if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
return 0;
@@ -3579,12 +3450,31 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
key.offset = found_key.objectid - 1;
continue;
}
+
}
+
/*
- * Inode is already gone but the orphan item is still there,
- * kill the orphan item.
+ * If we have an inode with links, there are a couple of
+ * possibilities. Old kernels (before v3.12) used to create an
+ * orphan item for truncate indicating that there were possibly
+ * extent items past i_size that needed to be deleted. In v3.12,
+ * truncate was changed to update i_size in sync with the extent
+ * items, but the (useless) orphan item was still created. Since
+ * v4.18, we don't create the orphan item for truncate at all.
+ *
+ * So, this item could mean that we need to do a truncate, but
+ * only if this filesystem was last used on a pre-v3.12 kernel
+ * and was not cleanly unmounted. The odds of that are quite
+ * slim, and it's a pain to do the truncate now, so just delete
+ * the orphan item.
+ *
+ * It's also possible that this orphan item was supposed to be
+ * deleted but wasn't. The inode number may have been reused,
+ * but either way, we can delete the orphan item.
*/
- if (ret == -ENOENT) {
+ if (ret == -ENOENT || inode->i_nlink) {
+ if (!ret)
+ iput(inode);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -3600,42 +3490,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
continue;
}
- /*
- * add this inode to the orphan list so btrfs_orphan_del does
- * the proper thing when we hit it
- */
- set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &BTRFS_I(inode)->runtime_flags);
- atomic_inc(&root->orphan_inodes);
-
- /* if we have links, this was a truncate, lets do that */
- if (inode->i_nlink) {
- if (WARN_ON(!S_ISREG(inode->i_mode))) {
- iput(inode);
- continue;
- }
- nr_truncate++;
-
- /* 1 for the orphan item deletion. */
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- iput(inode);
- ret = PTR_ERR(trans);
- goto out;
- }
- ret = btrfs_orphan_add(trans, BTRFS_I(inode));
- btrfs_end_transaction(trans);
- if (ret) {
- iput(inode);
- goto out;
- }
-
- ret = btrfs_truncate(inode, false);
- if (ret)
- btrfs_orphan_del(NULL, BTRFS_I(inode));
- } else {
- nr_unlink++;
- }
+ nr_unlink++;
/* this will do delete_inode and everything for us */
iput(inode);
@@ -3647,12 +3502,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
- if (root->orphan_block_rsv)
- btrfs_block_rsv_release(fs_info, root->orphan_block_rsv,
- (u64)-1);
-
- if (root->orphan_block_rsv ||
- test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
+ if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
trans = btrfs_join_transaction(root);
if (!IS_ERR(trans))
btrfs_end_transaction(trans);
@@ -3660,8 +3510,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
if (nr_unlink)
btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
- if (nr_truncate)
- btrfs_debug(fs_info, "truncated %d orphans", nr_truncate);
out:
if (ret)
@@ -3924,7 +3772,7 @@ cache_acl:
break;
}
- btrfs_update_iflags(inode);
+ btrfs_sync_inode_flags_to_i_flags(inode);
return 0;
make_bad:
@@ -4238,7 +4086,7 @@ out:
return ret;
}
-int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *dir, u64 objectid,
const char *name, int name_len)
@@ -4319,6 +4167,262 @@ out:
return ret;
}
+/*
+ * Helper to check if the subvolume references other subvolumes or if it's
+ * default.
+ */
+static noinline int may_destroy_subvol(struct btrfs_root *root)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_path *path;
+ struct btrfs_dir_item *di;
+ struct btrfs_key key;
+ u64 dir_id;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /* Make sure this root isn't set as the default subvol */
+ dir_id = btrfs_super_root_dir(fs_info->super_copy);
+ di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
+ dir_id, "default", 7, 0);
+ if (di && !IS_ERR(di)) {
+ btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
+ if (key.objectid == root->root_key.objectid) {
+ ret = -EPERM;
+ btrfs_err(fs_info,
+ "deleting default subvolume %llu is not allowed",
+ key.objectid);
+ goto out;
+ }
+ btrfs_release_path(path);
+ }
+
+ key.objectid = root->root_key.objectid;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret == 0);
+
+ ret = 0;
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid == root->root_key.objectid &&
+ key.type == BTRFS_ROOT_REF_KEY)
+ ret = -ENOTEMPTY;
+ }
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/* Delete all dentries for inodes belonging to the root */
+static void btrfs_prune_dentries(struct btrfs_root *root)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct rb_node *node;
+ struct rb_node *prev;
+ struct btrfs_inode *entry;
+ struct inode *inode;
+ u64 objectid = 0;
+
+ if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+ WARN_ON(btrfs_root_refs(&root->root_item) != 0);
+
+ spin_lock(&root->inode_lock);
+again:
+ node = root->inode_tree.rb_node;
+ prev = NULL;
+ while (node) {
+ prev = node;
+ entry = rb_entry(node, struct btrfs_inode, rb_node);
+
+ if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+ node = node->rb_left;
+ else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+ node = node->rb_right;
+ else
+ break;
+ }
+ if (!node) {
+ while (prev) {
+ entry = rb_entry(prev, struct btrfs_inode, rb_node);
+ if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) {
+ node = prev;
+ break;
+ }
+ prev = rb_next(prev);
+ }
+ }
+ while (node) {
+ entry = rb_entry(node, struct btrfs_inode, rb_node);
+ objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1;
+ inode = igrab(&entry->vfs_inode);
+ if (inode) {
+ spin_unlock(&root->inode_lock);
+ if (atomic_read(&inode->i_count) > 1)
+ d_prune_aliases(inode);
+ /*
+ * btrfs_drop_inode will have it removed from the inode
+ * cache when its usage count hits zero.
+ */
+ iput(inode);
+ cond_resched();
+ spin_lock(&root->inode_lock);
+ goto again;
+ }
+
+ if (cond_resched_lock(&root->inode_lock))
+ goto again;
+
+ node = rb_next(node);
+ }
+ spin_unlock(&root->inode_lock);
+}
+
+int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
+ struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct inode *inode = d_inode(dentry);
+ struct btrfs_root *dest = BTRFS_I(inode)->root;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_block_rsv block_rsv;
+ u64 root_flags;
+ int ret;
+ int err;
+
+ /*
+ * Don't allow to delete a subvolume with send in progress. This is
+ * inside the inode lock so the error handling that has to drop the bit
+ * again is not run concurrently.
+ */
+ spin_lock(&dest->root_item_lock);
+ root_flags = btrfs_root_flags(&dest->root_item);
+ if (dest->send_in_progress == 0) {
+ btrfs_set_root_flags(&dest->root_item,
+ root_flags | BTRFS_ROOT_SUBVOL_DEAD);
+ spin_unlock(&dest->root_item_lock);
+ } else {
+ spin_unlock(&dest->root_item_lock);
+ btrfs_warn(fs_info,
+ "attempt to delete subvolume %llu during send",
+ dest->root_key.objectid);
+ return -EPERM;
+ }
+
+ down_write(&fs_info->subvol_sem);
+
+ err = may_destroy_subvol(dest);
+ if (err)
+ goto out_up_write;
+
+ btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
+ /*
+ * One for dir inode,
+ * two for dir entries,
+ * two for root ref/backref.
+ */
+ err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
+ if (err)
+ goto out_up_write;
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ goto out_release;
+ }
+ trans->block_rsv = &block_rsv;
+ trans->bytes_reserved = block_rsv.size;
+
+ btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
+
+ ret = btrfs_unlink_subvol(trans, root, dir,
+ dest->root_key.objectid,
+ dentry->d_name.name,
+ dentry->d_name.len);
+ if (ret) {
+ err = ret;
+ btrfs_abort_transaction(trans, ret);
+ goto out_end_trans;
+ }
+
+ btrfs_record_root_in_trans(trans, dest);
+
+ memset(&dest->root_item.drop_progress, 0,
+ sizeof(dest->root_item.drop_progress));
+ dest->root_item.drop_level = 0;
+ btrfs_set_root_refs(&dest->root_item, 0);
+
+ if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
+ ret = btrfs_insert_orphan_item(trans,
+ fs_info->tree_root,
+ dest->root_key.objectid);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ err = ret;
+ goto out_end_trans;
+ }
+ }
+
+ ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid,
+ BTRFS_UUID_KEY_SUBVOL,
+ dest->root_key.objectid);
+ if (ret && ret != -ENOENT) {
+ btrfs_abort_transaction(trans, ret);
+ err = ret;
+ goto out_end_trans;
+ }
+ if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
+ ret = btrfs_uuid_tree_remove(trans,
+ dest->root_item.received_uuid,
+ BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+ dest->root_key.objectid);
+ if (ret && ret != -ENOENT) {
+ btrfs_abort_transaction(trans, ret);
+ err = ret;
+ goto out_end_trans;
+ }
+ }
+
+out_end_trans:
+ trans->block_rsv = NULL;
+ trans->bytes_reserved = 0;
+ ret = btrfs_end_transaction(trans);
+ if (ret && !err)
+ err = ret;
+ inode->i_flags |= S_DEAD;
+out_release:
+ btrfs_subvolume_release_metadata(fs_info, &block_rsv);
+out_up_write:
+ up_write(&fs_info->subvol_sem);
+ if (err) {
+ spin_lock(&dest->root_item_lock);
+ root_flags = btrfs_root_flags(&dest->root_item);
+ btrfs_set_root_flags(&dest->root_item,
+ root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
+ spin_unlock(&dest->root_item_lock);
+ } else {
+ d_invalidate(dentry);
+ btrfs_prune_dentries(dest);
+ ASSERT(dest->send_in_progress == 0);
+
+ /* the last ref */
+ if (dest->ino_cache_inode) {
+ iput(dest->ino_cache_inode);
+ dest->ino_cache_inode = NULL;
+ }
+ }
+
+ return err;
+}
+
static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
@@ -4330,7 +4434,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
- return -EPERM;
+ return btrfs_delete_subvolume(dir, dentry);
trans = __unlink_start_trans(dir);
if (IS_ERR(trans))
@@ -4442,7 +4546,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int pending_del_slot = 0;
int extent_type = -1;
int ret;
- int err = 0;
u64 ino = btrfs_ino(BTRFS_I(inode));
u64 bytes_deleted = 0;
bool be_nice = false;
@@ -4494,22 +4597,19 @@ search_again:
* up a huge file in a single leaf. Most of the time that
* bytes_deleted is > 0, it will be huge by the time we get here
*/
- if (be_nice && bytes_deleted > SZ_32M) {
- if (btrfs_should_end_transaction(trans)) {
- err = -EAGAIN;
- goto error;
- }
+ if (be_nice && bytes_deleted > SZ_32M &&
+ btrfs_should_end_transaction(trans)) {
+ ret = -EAGAIN;
+ goto out;
}
-
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0) {
- err = ret;
+ if (ret < 0)
goto out;
- }
if (ret > 0) {
+ ret = 0;
/* there are no items in the tree for us to truncate, we're
* done
*/
@@ -4620,7 +4720,7 @@ search_again:
* We have to bail so the last_size is set to
* just before this extent.
*/
- err = NEED_TRUNCATE_BLOCK;
+ ret = NEED_TRUNCATE_BLOCK;
break;
}
@@ -4659,7 +4759,10 @@ delete:
extent_num_bytes, 0,
btrfs_header_owner(leaf),
ino, extent_offset);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ break;
+ }
if (btrfs_should_throttle_delayed_refs(trans, fs_info))
btrfs_async_run_delayed_refs(fs_info,
trans->delayed_ref_updates * 2,
@@ -4687,7 +4790,7 @@ delete:
pending_del_nr);
if (ret) {
btrfs_abort_transaction(trans, ret);
- goto error;
+ break;
}
pending_del_nr = 0;
}
@@ -4698,8 +4801,8 @@ delete:
trans->delayed_ref_updates = 0;
ret = btrfs_run_delayed_refs(trans,
updates * 2);
- if (ret && !err)
- err = ret;
+ if (ret)
+ break;
}
}
/*
@@ -4707,8 +4810,8 @@ delete:
* and let the transaction restart
*/
if (should_end) {
- err = -EAGAIN;
- goto error;
+ ret = -EAGAIN;
+ break;
}
goto search_again;
} else {
@@ -4716,32 +4819,37 @@ delete:
}
}
out:
- if (pending_del_nr) {
- ret = btrfs_del_items(trans, root, path, pending_del_slot,
+ if (ret >= 0 && pending_del_nr) {
+ int err;
+
+ err = btrfs_del_items(trans, root, path, pending_del_slot,
pending_del_nr);
- if (ret)
- btrfs_abort_transaction(trans, ret);
+ if (err) {
+ btrfs_abort_transaction(trans, err);
+ ret = err;
+ }
}
-error:
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ASSERT(last_size >= new_size);
- if (!err && last_size > new_size)
+ if (!ret && last_size > new_size)
last_size = new_size;
btrfs_ordered_update_i_size(inode, last_size, NULL);
}
btrfs_free_path(path);
- if (be_nice && bytes_deleted > SZ_32M) {
+ if (be_nice && bytes_deleted > SZ_32M && (ret >= 0 || ret == -EAGAIN)) {
unsigned long updates = trans->delayed_ref_updates;
+ int err;
+
if (updates) {
trans->delayed_ref_updates = 0;
- ret = btrfs_run_delayed_refs(trans, updates * 2);
- if (ret && !err)
- err = ret;
+ err = btrfs_run_delayed_refs(trans, updates * 2);
+ if (err)
+ ret = err;
}
}
- return err;
+ return ret;
}
/*
@@ -5083,30 +5191,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags);
- /*
- * 1 for the orphan item we're going to add
- * 1 for the orphan item deletion.
- */
- trans = btrfs_start_transaction(root, 2);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- /*
- * We need to do this in case we fail at _any_ point during the
- * actual truncate. Once we do the truncate_setsize we could
- * invalidate pages which forces any outstanding ordered io to
- * be instantly completed which will give us extents that need
- * to be truncated. If we fail to get an orphan inode down we
- * could have left over extents that were never meant to live,
- * so we need to guarantee from this point on that everything
- * will be consistent.
- */
- ret = btrfs_orphan_add(trans, BTRFS_I(inode));
- btrfs_end_transaction(trans);
- if (ret)
- return ret;
-
- /* we don't support swapfiles, so vmtruncate shouldn't fail */
truncate_setsize(inode, newsize);
/* Disable nonlocked read DIO to avoid the end less truncate */
@@ -5118,29 +5202,16 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
if (ret && inode->i_nlink) {
int err;
- /* To get a stable disk_i_size */
- err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
- if (err) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
- return err;
- }
-
/*
- * failed to truncate, disk_i_size is only adjusted down
- * as we remove extents, so it should represent the true
- * size of the inode, so reset the in memory size and
- * delete our orphan entry.
+ * Truncate failed, so fix up the in-memory size. We
+ * adjusted disk_i_size down as we removed extents, so
+ * wait for disk_i_size to be stable and then update the
+ * in-memory size to match.
*/
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
- return ret;
- }
- i_size_write(inode, BTRFS_I(inode)->disk_i_size);
- err = btrfs_orphan_del(trans, BTRFS_I(inode));
+ err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (err)
- btrfs_abort_transaction(trans, err);
- btrfs_end_transaction(trans);
+ return err;
+ i_size_write(inode, BTRFS_I(inode)->disk_i_size);
}
}
@@ -5270,13 +5341,52 @@ static void evict_inode_truncate_pages(struct inode *inode)
spin_unlock(&io_tree->lock);
}
+static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
+ struct btrfs_block_rsv *rsv,
+ u64 min_size)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ int failures = 0;
+
+ for (;;) {
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ ret = btrfs_block_rsv_refill(root, rsv, min_size,
+ BTRFS_RESERVE_FLUSH_LIMIT);
+
+ if (ret && ++failures > 2) {
+ btrfs_warn(fs_info,
+ "could not allocate space for a delete; will truncate on mount");
+ return ERR_PTR(-ENOSPC);
+ }
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans) || !ret)
+ return trans;
+
+ /*
+ * Try to steal from the global reserve if there is space for
+ * it.
+ */
+ if (!btrfs_check_space_for_delayed_refs(trans, fs_info) &&
+ !btrfs_block_rsv_migrate(global_rsv, rsv, min_size, 0))
+ return trans;
+
+ /* If not, commit and try again. */
+ ret = btrfs_commit_transaction(trans);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+}
+
void btrfs_evict_inode(struct inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_block_rsv *rsv, *global_rsv;
- int steal_from_global = 0;
+ struct btrfs_block_rsv *rsv;
u64 min_size;
int ret;
@@ -5297,21 +5407,16 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_is_free_space_inode(BTRFS_I(inode))))
goto no_delete;
- if (is_bad_inode(inode)) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
+ if (is_bad_inode(inode))
goto no_delete;
- }
/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
if (!special_file(inode->i_mode))
btrfs_wait_ordered_range(inode, 0, (u64)-1);
btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
- if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
- BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &BTRFS_I(inode)->runtime_flags));
+ if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
goto no_delete;
- }
if (inode->i_nlink > 0) {
BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
@@ -5320,130 +5425,63 @@ void btrfs_evict_inode(struct inode *inode)
}
ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
- if (ret) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
+ if (ret)
goto no_delete;
- }
rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
- if (!rsv) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
+ if (!rsv)
goto no_delete;
- }
rsv->size = min_size;
rsv->failfast = 1;
- global_rsv = &fs_info->global_block_rsv;
btrfs_i_size_write(BTRFS_I(inode), 0);
- /*
- * This is a bit simpler than btrfs_truncate since we've already
- * reserved our space for our orphan item in the unlink, so we just
- * need to reserve some slack space in case we add bytes and update
- * inode item when doing the truncate.
- */
while (1) {
- ret = btrfs_block_rsv_refill(root, rsv, min_size,
- BTRFS_RESERVE_FLUSH_LIMIT);
-
- /*
- * Try and steal from the global reserve since we will
- * likely not use this space anyway, we want to try as
- * hard as possible to get this to work.
- */
- if (ret)
- steal_from_global++;
- else
- steal_from_global = 0;
- ret = 0;
-
- /*
- * steal_from_global == 0: we reserved stuff, hooray!
- * steal_from_global == 1: we didn't reserve stuff, boo!
- * steal_from_global == 2: we've committed, still not a lot of
- * room but maybe we'll have room in the global reserve this
- * time.
- * steal_from_global == 3: abandon all hope!
- */
- if (steal_from_global > 2) {
- btrfs_warn(fs_info,
- "Could not get space for a delete, will truncate on mount %d",
- ret);
- btrfs_orphan_del(NULL, BTRFS_I(inode));
- btrfs_free_block_rsv(fs_info, rsv);
- goto no_delete;
- }
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
- btrfs_free_block_rsv(fs_info, rsv);
- goto no_delete;
- }
-
- /*
- * We can't just steal from the global reserve, we need to make
- * sure there is room to do it, if not we need to commit and try
- * again.
- */
- if (steal_from_global) {
- if (!btrfs_check_space_for_delayed_refs(trans, fs_info))
- ret = btrfs_block_rsv_migrate(global_rsv, rsv,
- min_size, 0);
- else
- ret = -ENOSPC;
- }
-
- /*
- * Couldn't steal from the global reserve, we have too much
- * pending stuff built up, commit the transaction and try it
- * again.
- */
- if (ret) {
- ret = btrfs_commit_transaction(trans);
- if (ret) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
- btrfs_free_block_rsv(fs_info, rsv);
- goto no_delete;
- }
- continue;
- } else {
- steal_from_global = 0;
- }
+ trans = evict_refill_and_join(root, rsv, min_size);
+ if (IS_ERR(trans))
+ goto free_rsv;
trans->block_rsv = rsv;
ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
- if (ret != -ENOSPC && ret != -EAGAIN)
- break;
-
trans->block_rsv = &fs_info->trans_block_rsv;
btrfs_end_transaction(trans);
- trans = NULL;
btrfs_btree_balance_dirty(fs_info);
+ if (ret && ret != -ENOSPC && ret != -EAGAIN)
+ goto free_rsv;
+ else if (!ret)
+ break;
}
- btrfs_free_block_rsv(fs_info, rsv);
-
/*
- * Errors here aren't a big deal, it just means we leave orphan items
- * in the tree. They will be cleaned up on the next mount.
+ * Errors here aren't a big deal, it just means we leave orphan items in
+ * the tree. They will be cleaned up on the next mount. If the inode
+ * number gets reused, cleanup deletes the orphan item without doing
+ * anything, and unlink reuses the existing orphan item.
+ *
+ * If it turns out that we are dropping too many of these, we might want
+ * to add a mechanism for retrying these after a commit.
*/
- if (ret == 0) {
- trans->block_rsv = root->orphan_block_rsv;
+ trans = evict_refill_and_join(root, rsv, min_size);
+ if (!IS_ERR(trans)) {
+ trans->block_rsv = rsv;
btrfs_orphan_del(trans, BTRFS_I(inode));
- } else {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
+ trans->block_rsv = &fs_info->trans_block_rsv;
+ btrfs_end_transaction(trans);
}
- trans->block_rsv = &fs_info->trans_block_rsv;
if (!(root == fs_info->tree_root ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode)));
- btrfs_end_transaction(trans);
- btrfs_btree_balance_dirty(fs_info);
+free_rsv:
+ btrfs_free_block_rsv(fs_info, rsv);
no_delete:
+ /*
+ * If we didn't successfully delete, the orphan item will still be in
+ * the tree and we'll retry on the next mount. Again, we might also want
+ * to retry these periodically in the future.
+ */
btrfs_remove_delayed_node(BTRFS_I(inode));
clear_inode(inode);
}
@@ -5619,69 +5657,6 @@ static void inode_tree_del(struct inode *inode)
}
}
-void btrfs_invalidate_inodes(struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct rb_node *node;
- struct rb_node *prev;
- struct btrfs_inode *entry;
- struct inode *inode;
- u64 objectid = 0;
-
- if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
- WARN_ON(btrfs_root_refs(&root->root_item) != 0);
-
- spin_lock(&root->inode_lock);
-again:
- node = root->inode_tree.rb_node;
- prev = NULL;
- while (node) {
- prev = node;
- entry = rb_entry(node, struct btrfs_inode, rb_node);
-
- if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
- node = node->rb_left;
- else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
- node = node->rb_right;
- else
- break;
- }
- if (!node) {
- while (prev) {
- entry = rb_entry(prev, struct btrfs_inode, rb_node);
- if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) {
- node = prev;
- break;
- }
- prev = rb_next(prev);
- }
- }
- while (node) {
- entry = rb_entry(node, struct btrfs_inode, rb_node);
- objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1;
- inode = igrab(&entry->vfs_inode);
- if (inode) {
- spin_unlock(&root->inode_lock);
- if (atomic_read(&inode->i_count) > 1)
- d_prune_aliases(inode);
- /*
- * btrfs_drop_inode will have it removed from
- * the inode cache when its usage count
- * hits zero.
- */
- iput(inode);
- cond_resched();
- spin_lock(&root->inode_lock);
- goto again;
- }
-
- if (cond_resched_lock(&root->inode_lock))
- goto again;
-
- node = rb_next(node);
- }
- spin_unlock(&root->inode_lock);
-}
static int btrfs_init_locked_inode(struct inode *inode, void *p)
{
@@ -5843,11 +5818,6 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
return 0;
}
-static void btrfs_dentry_release(struct dentry *dentry)
-{
- kfree(dentry->d_fsdata);
-}
-
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
@@ -6263,7 +6233,7 @@ static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
}
- btrfs_update_iflags(inode);
+ btrfs_sync_inode_flags_to_i_flags(inode);
}
static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
@@ -6579,8 +6549,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
} else {
btrfs_update_inode(trans, root, inode);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
}
out_unlock:
@@ -6656,8 +6625,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
@@ -6700,8 +6668,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
* 2 items for inode and inode ref
* 2 items for dir items
* 1 item for parent inode
+ * 1 item for orphan item deletion if O_TMPFILE
*/
- trans = btrfs_start_transaction(root, 5);
+ trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
trans = NULL;
@@ -6802,12 +6771,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (err)
goto out_fail_inode;
- d_instantiate(dentry, inode);
- /*
- * mkdir is special. We're unlocking after we call d_instantiate
- * to avoid a race with nfsd calling d_instantiate.
- */
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
drop_on_err = 0;
out_fail:
@@ -7083,7 +7047,7 @@ insert:
err = 0;
write_lock(&em_tree->lock);
- err = btrfs_add_extent_mapping(em_tree, &em, start, len);
+ err = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
write_unlock(&em_tree->lock);
out:
@@ -7368,6 +7332,14 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
btrfs_file_extent_other_encoding(leaf, fi))
goto out;
+ /*
+ * Do the same check as in btrfs_cross_ref_exist but without the
+ * unnecessary search.
+ */
+ if (btrfs_file_extent_generation(leaf, fi) <=
+ btrfs_root_last_snapshot(&root->root_item))
+ goto out;
+
backref_offset = btrfs_file_extent_offset(leaf, fi);
if (orig_start) {
@@ -7568,6 +7540,125 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
return em;
}
+
+static int btrfs_get_blocks_direct_read(struct extent_map *em,
+ struct buffer_head *bh_result,
+ struct inode *inode,
+ u64 start, u64 len)
+{
+ if (em->block_start == EXTENT_MAP_HOLE ||
+ test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ return -ENOENT;
+
+ len = min(len, em->len - (start - em->start));
+
+ bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
+ inode->i_blkbits;
+ bh_result->b_size = len;
+ bh_result->b_bdev = em->bdev;
+ set_buffer_mapped(bh_result);
+
+ return 0;
+}
+
+static int btrfs_get_blocks_direct_write(struct extent_map **map,
+ struct buffer_head *bh_result,
+ struct inode *inode,
+ struct btrfs_dio_data *dio_data,
+ u64 start, u64 len)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct extent_map *em = *map;
+ int ret = 0;
+
+ /*
+ * We don't allocate a new extent in the following cases
+ *
+ * 1) The inode is marked as NODATACOW. In this case we'll just use the
+ * existing extent.
+ * 2) The extent is marked as PREALLOC. We're good to go here and can
+ * just use the extent.
+ *
+ */
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
+ ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
+ em->block_start != EXTENT_MAP_HOLE)) {
+ int type;
+ u64 block_start, orig_start, orig_block_len, ram_bytes;
+
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ type = BTRFS_ORDERED_PREALLOC;
+ else
+ type = BTRFS_ORDERED_NOCOW;
+ len = min(len, em->len - (start - em->start));
+ block_start = em->block_start + (start - em->start);
+
+ if (can_nocow_extent(inode, start, &len, &orig_start,
+ &orig_block_len, &ram_bytes) == 1 &&
+ btrfs_inc_nocow_writers(fs_info, block_start)) {
+ struct extent_map *em2;
+
+ em2 = btrfs_create_dio_extent(inode, start, len,
+ orig_start, block_start,
+ len, orig_block_len,
+ ram_bytes, type);
+ btrfs_dec_nocow_writers(fs_info, block_start);
+ if (type == BTRFS_ORDERED_PREALLOC) {
+ free_extent_map(em);
+ *map = em = em2;
+ }
+
+ if (em2 && IS_ERR(em2)) {
+ ret = PTR_ERR(em2);
+ goto out;
+ }
+ /*
+ * For inode marked NODATACOW or extent marked PREALLOC,
+ * use the existing or preallocated extent, so does not
+ * need to adjust btrfs_space_info's bytes_may_use.
+ */
+ btrfs_free_reserved_data_space_noquota(inode, start,
+ len);
+ goto skip_cow;
+ }
+ }
+
+ /* this will cow the extent */
+ len = bh_result->b_size;
+ free_extent_map(em);
+ *map = em = btrfs_new_extent_direct(inode, start, len);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+
+ len = min(len, em->len - (start - em->start));
+
+skip_cow:
+ bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
+ inode->i_blkbits;
+ bh_result->b_size = len;
+ bh_result->b_bdev = em->bdev;
+ set_buffer_mapped(bh_result);
+
+ if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ set_buffer_new(bh_result);
+
+ /*
+ * Need to update the i_size under the extent lock so buffered
+ * readers will get the updated i_size when we unlock.
+ */
+ if (!dio_data->overwrite && start + len > i_size_read(inode))
+ i_size_write(inode, start + len);
+
+ WARN_ON(dio_data->reserve < len);
+ dio_data->reserve -= len;
+ dio_data->unsubmitted_oe_range_end = start + len;
+ current->journal_info = dio_data;
+out:
+ return ret;
+}
+
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
@@ -7636,116 +7727,36 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
goto unlock_err;
}
- /* Just a good old fashioned hole, return */
- if (!create && (em->block_start == EXTENT_MAP_HOLE ||
- test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
- free_extent_map(em);
- goto unlock_err;
- }
-
- /*
- * We don't allocate a new extent in the following cases
- *
- * 1) The inode is marked as NODATACOW. In this case we'll just use the
- * existing extent.
- * 2) The extent is marked as PREALLOC. We're good to go here and can
- * just use the extent.
- *
- */
- if (!create) {
- len = min(len, em->len - (start - em->start));
- lockstart = start + len;
- goto unlock;
- }
-
- if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
- ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
- em->block_start != EXTENT_MAP_HOLE)) {
- int type;
- u64 block_start, orig_start, orig_block_len, ram_bytes;
-
- if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
- type = BTRFS_ORDERED_PREALLOC;
- else
- type = BTRFS_ORDERED_NOCOW;
- len = min(len, em->len - (start - em->start));
- block_start = em->block_start + (start - em->start);
-
- if (can_nocow_extent(inode, start, &len, &orig_start,
- &orig_block_len, &ram_bytes) == 1 &&
- btrfs_inc_nocow_writers(fs_info, block_start)) {
- struct extent_map *em2;
-
- em2 = btrfs_create_dio_extent(inode, start, len,
- orig_start, block_start,
- len, orig_block_len,
- ram_bytes, type);
- btrfs_dec_nocow_writers(fs_info, block_start);
- if (type == BTRFS_ORDERED_PREALLOC) {
- free_extent_map(em);
- em = em2;
- }
- if (em2 && IS_ERR(em2)) {
- ret = PTR_ERR(em2);
- goto unlock_err;
- }
- /*
- * For inode marked NODATACOW or extent marked PREALLOC,
- * use the existing or preallocated extent, so does not
- * need to adjust btrfs_space_info's bytes_may_use.
- */
- btrfs_free_reserved_data_space_noquota(inode,
- start, len);
- goto unlock;
- }
- }
-
- /*
- * this will cow the extent, reset the len in case we changed
- * it above
- */
- len = bh_result->b_size;
- free_extent_map(em);
- em = btrfs_new_extent_direct(inode, start, len);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto unlock_err;
- }
- len = min(len, em->len - (start - em->start));
-unlock:
- bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
- inode->i_blkbits;
- bh_result->b_size = len;
- bh_result->b_bdev = em->bdev;
- set_buffer_mapped(bh_result);
if (create) {
- if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
- set_buffer_new(bh_result);
+ ret = btrfs_get_blocks_direct_write(&em, bh_result, inode,
+ dio_data, start, len);
+ if (ret < 0)
+ goto unlock_err;
+ /* clear and unlock the entire range */
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ unlock_bits, 1, 0, &cached_state);
+ } else {
+ ret = btrfs_get_blocks_direct_read(em, bh_result, inode,
+ start, len);
+ /* Can be negative only if we read from a hole */
+ if (ret < 0) {
+ ret = 0;
+ free_extent_map(em);
+ goto unlock_err;
+ }
/*
- * Need to update the i_size under the extent lock so buffered
- * readers will get the updated i_size when we unlock.
+ * We need to unlock only the end area that we aren't using.
+ * The rest is going to be unlocked by the endio routine.
*/
- if (!dio_data->overwrite && start + len > i_size_read(inode))
- i_size_write(inode, start + len);
-
- WARN_ON(dio_data->reserve < len);
- dio_data->reserve -= len;
- dio_data->unsubmitted_oe_range_end = start + len;
- current->journal_info = dio_data;
- }
-
- /*
- * In the case of write we need to clear and unlock the entire range,
- * in the case of read we need to unlock only the end area that we
- * aren't using if there is any left over space.
- */
- if (lockstart < lockend) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, unlock_bits, 1, 0,
- &cached_state);
- } else {
- free_extent_state(cached_state);
+ lockstart = start + bh_result->b_size;
+ if (lockstart < lockend) {
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, unlock_bits, 1, 0,
+ &cached_state);
+ } else {
+ free_extent_state(cached_state);
+ }
}
free_extent_map(em);
@@ -8131,7 +8142,6 @@ static void __endio_write_update_ordered(struct inode *inode,
u64 ordered_offset = offset;
u64 ordered_bytes = bytes;
u64 last_offset;
- int ret;
if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
wq = fs_info->endio_freespace_worker;
@@ -8141,32 +8151,31 @@ static void __endio_write_update_ordered(struct inode *inode,
func = btrfs_endio_write_helper;
}
-again:
- last_offset = ordered_offset;
- ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
- &ordered_offset,
- ordered_bytes,
- uptodate);
- if (!ret)
- goto out_test;
-
- btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL);
- btrfs_queue_work(wq, &ordered->work);
-out_test:
- /*
- * If btrfs_dec_test_ordered_pending does not find any ordered extent
- * in the range, we can exit.
- */
- if (ordered_offset == last_offset)
- return;
- /*
- * our bio might span multiple ordered extents. If we haven't
- * completed the accounting for the whole dio, go back and try again
- */
- if (ordered_offset < offset + bytes) {
- ordered_bytes = offset + bytes - ordered_offset;
- ordered = NULL;
- goto again;
+ while (ordered_offset < offset + bytes) {
+ last_offset = ordered_offset;
+ if (btrfs_dec_test_first_ordered_pending(inode, &ordered,
+ &ordered_offset,
+ ordered_bytes,
+ uptodate)) {
+ btrfs_init_work(&ordered->work, func,
+ finish_ordered_fn,
+ NULL, NULL);
+ btrfs_queue_work(wq, &ordered->work);
+ }
+ /*
+ * If btrfs_dec_test_ordered_pending does not find any ordered
+ * extent in the range, we can exit.
+ */
+ if (ordered_offset == last_offset)
+ return;
+ /*
+ * Our bio might span multiple ordered extents. In this case
+ * we keep goin until we have accounted the whole dio.
+ */
+ if (ordered_offset < offset + bytes) {
+ ordered_bytes = offset + bytes - ordered_offset;
+ ordered = NULL;
+ }
}
}
@@ -8705,29 +8714,19 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
static int btrfs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct extent_io_tree *tree;
-
- tree = &BTRFS_I(mapping->host)->io_tree;
- return extent_writepages(tree, mapping, wbc);
+ return extent_writepages(mapping, wbc);
}
static int
btrfs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
- struct extent_io_tree *tree;
- tree = &BTRFS_I(mapping->host)->io_tree;
- return extent_readpages(tree, mapping, pages, nr_pages);
+ return extent_readpages(mapping, pages, nr_pages);
}
+
static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
- struct extent_io_tree *tree;
- struct extent_map_tree *map;
- int ret;
-
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- map = &BTRFS_I(page->mapping->host)->extent_tree;
- ret = try_release_extent_mapping(map, tree, page, gfp_flags);
+ int ret = try_release_extent_mapping(page, gfp_flags);
if (ret == 1) {
ClearPagePrivate(page);
set_page_private(page, 0);
@@ -8868,8 +8867,8 @@ again:
*
* We are not allowed to take the i_mutex here so we have to play games to
* protect against truncate races as the page could now be beyond EOF. Because
- * vmtruncate() writes the inode size before removing pages, once we have the
- * page lock we can determine safely if the page is beyond EOF. If it is not
+ * truncate_setsize() writes the inode size before removing pages, once we have
+ * the page lock we can determine safely if the page is beyond EOF. If it is not
* beyond EOF, then the page is guaranteed safe against truncation until we
* unlock the page.
*/
@@ -9031,8 +9030,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *rsv;
- int ret = 0;
- int err = 0;
+ int ret;
struct btrfs_trans_handle *trans;
u64 mask = fs_info->sectorsize - 1;
u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
@@ -9045,39 +9043,31 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
}
/*
- * Yes ladies and gentlemen, this is indeed ugly. The fact is we have
- * 3 things going on here
- *
- * 1) We need to reserve space for our orphan item and the space to
- * delete our orphan item. Lord knows we don't want to have a dangling
- * orphan item because we didn't reserve space to remove it.
+ * Yes ladies and gentlemen, this is indeed ugly. We have a couple of
+ * things going on here:
*
- * 2) We need to reserve space to update our inode.
+ * 1) We need to reserve space to update our inode.
*
- * 3) We need to have something to cache all the space that is going to
+ * 2) We need to have something to cache all the space that is going to
* be free'd up by the truncate operation, but also have some slack
* space reserved in case it uses space during the truncate (thank you
* very much snapshotting).
*
- * And we need these to all be separate. The fact is we can use a lot of
+ * And we need these to be separate. The fact is we can use a lot of
* space doing the truncate, and we have no earthly idea how much space
* we will use, so we need the truncate reservation to be separate so it
- * doesn't end up using space reserved for updating the inode or
- * removing the orphan item. We also need to be able to stop the
- * transaction and start a new one, which means we need to be able to
- * update the inode several times, and we have no idea of knowing how
- * many times that will be, so we can't just reserve 1 item for the
- * entirety of the operation, so that has to be done separately as well.
- * Then there is the orphan item, which does indeed need to be held on
- * to for the whole operation, and we need nobody to touch this reserved
- * space except the orphan code.
+ * doesn't end up using space reserved for updating the inode. We also
+ * need to be able to stop the transaction and start a new one, which
+ * means we need to be able to update the inode several times, and we
+ * have no idea of knowing how many times that will be, so we can't just
+ * reserve 1 item for the entirety of the operation, so that has to be
+ * done separately as well.
*
* So that leaves us with
*
- * 1) root->orphan_block_rsv - for the orphan deletion.
- * 2) rsv - for the truncate reservation, which we will steal from the
+ * 1) rsv - for the truncate reservation, which we will steal from the
* transaction reservation.
- * 3) fs_info->trans_block_rsv - this will have 1 items worth left for
+ * 2) fs_info->trans_block_rsv - this will have 1 items worth left for
* updating the inode.
*/
rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
@@ -9092,7 +9082,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
*/
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
+ ret = PTR_ERR(trans);
goto out;
}
@@ -9116,23 +9106,19 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
inode->i_size,
BTRFS_EXTENT_DATA_KEY);
trans->block_rsv = &fs_info->trans_block_rsv;
- if (ret != -ENOSPC && ret != -EAGAIN) {
- err = ret;
+ if (ret != -ENOSPC && ret != -EAGAIN)
break;
- }
ret = btrfs_update_inode(trans, root, inode);
- if (ret) {
- err = ret;
+ if (ret)
break;
- }
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans)) {
- ret = err = PTR_ERR(trans);
+ ret = PTR_ERR(trans);
trans = NULL;
break;
}
@@ -9165,29 +9151,23 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
}
- if (ret == 0 && inode->i_nlink > 0) {
- trans->block_rsv = root->orphan_block_rsv;
- ret = btrfs_orphan_del(trans, BTRFS_I(inode));
- if (ret)
- err = ret;
- }
-
if (trans) {
+ int ret2;
+
trans->block_rsv = &fs_info->trans_block_rsv;
- ret = btrfs_update_inode(trans, root, inode);
- if (ret && !err)
- err = ret;
+ ret2 = btrfs_update_inode(trans, root, inode);
+ if (ret2 && !ret)
+ ret = ret2;
- ret = btrfs_end_transaction(trans);
+ ret2 = btrfs_end_transaction(trans);
+ if (ret2 && !ret)
+ ret = ret2;
btrfs_btree_balance_dirty(fs_info);
}
out:
btrfs_free_block_rsv(fs_info, rsv);
- if (ret && !err)
- err = ret;
-
- return err;
+ return ret;
}
/*
@@ -9323,13 +9303,6 @@ void btrfs_destroy_inode(struct inode *inode)
if (!root)
goto free;
- if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &BTRFS_I(inode)->runtime_flags)) {
- btrfs_info(fs_info, "inode %llu still on the orphan list",
- btrfs_ino(BTRFS_I(inode)));
- atomic_dec(&root->orphan_inodes);
- }
-
while (1) {
ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
if (!ordered)
@@ -9963,6 +9936,13 @@ static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}
+struct btrfs_delalloc_work {
+ struct inode *inode;
+ struct completion completion;
+ struct list_head list;
+ struct btrfs_work work;
+};
+
static void btrfs_run_delalloc_work(struct btrfs_work *work)
{
struct btrfs_delalloc_work *delalloc_work;
@@ -9976,15 +9956,11 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work)
&BTRFS_I(inode)->runtime_flags))
filemap_flush(inode->i_mapping);
- if (delalloc_work->delay_iput)
- btrfs_add_delayed_iput(inode);
- else
- iput(inode);
+ iput(inode);
complete(&delalloc_work->completion);
}
-struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
- int delay_iput)
+static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode)
{
struct btrfs_delalloc_work *work;
@@ -9995,7 +9971,6 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
init_completion(&work->completion);
INIT_LIST_HEAD(&work->list);
work->inode = inode;
- work->delay_iput = delay_iput;
WARN_ON_ONCE(!inode);
btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
btrfs_run_delalloc_work, NULL, NULL);
@@ -10003,18 +9978,11 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
return work;
}
-void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
-{
- wait_for_completion(&work->completion);
- kfree(work);
-}
-
/*
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
*/
-static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
- int nr)
+static int start_delalloc_inodes(struct btrfs_root *root, int nr)
{
struct btrfs_inode *binode;
struct inode *inode;
@@ -10042,12 +10010,9 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
}
spin_unlock(&root->delalloc_lock);
- work = btrfs_alloc_delalloc_work(inode, delay_iput);
+ work = btrfs_alloc_delalloc_work(inode);
if (!work) {
- if (delay_iput)
- btrfs_add_delayed_iput(inode);
- else
- iput(inode);
+ iput(inode);
ret = -ENOMEM;
goto out;
}
@@ -10065,10 +10030,11 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
out:
list_for_each_entry_safe(work, next, &works, list) {
list_del_init(&work->list);
- btrfs_wait_and_free_delalloc_work(work);
+ wait_for_completion(&work->completion);
+ kfree(work);
}
- if (!list_empty_careful(&splice)) {
+ if (!list_empty(&splice)) {
spin_lock(&root->delalloc_lock);
list_splice_tail(&splice, &root->delalloc_inodes);
spin_unlock(&root->delalloc_lock);
@@ -10077,7 +10043,7 @@ out:
return ret;
}
-int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
+int btrfs_start_delalloc_inodes(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
@@ -10085,14 +10051,13 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
return -EROFS;
- ret = __start_delalloc_inodes(root, delay_iput, -1);
+ ret = start_delalloc_inodes(root, -1);
if (ret > 0)
ret = 0;
return ret;
}
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
- int nr)
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr)
{
struct btrfs_root *root;
struct list_head splice;
@@ -10115,7 +10080,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
&fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
- ret = __start_delalloc_inodes(root, delay_iput, nr);
+ ret = start_delalloc_inodes(root, nr);
btrfs_put_fs_root(root);
if (ret < 0)
goto out;
@@ -10130,7 +10095,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
ret = 0;
out:
- if (!list_empty_careful(&splice)) {
+ if (!list_empty(&splice)) {
spin_lock(&fs_info->delalloc_root_lock);
list_splice_tail(&splice, &fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
@@ -10250,8 +10215,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
}
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
@@ -10669,5 +10633,4 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
const struct dentry_operations btrfs_dentry_operations = {
.d_delete = btrfs_dentry_delete,
- .d_release = btrfs_dentry_release,
};
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 632e26d6f7ce..d29992f7dc63 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -93,20 +93,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
int no_time_update);
/* Mask out flags that are inappropriate for the given type of inode. */
-static unsigned int btrfs_mask_flags(umode_t mode, unsigned int flags)
+static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
+ unsigned int flags)
{
- if (S_ISDIR(mode))
+ if (S_ISDIR(inode->i_mode))
return flags;
- else if (S_ISREG(mode))
+ else if (S_ISREG(inode->i_mode))
return flags & ~FS_DIRSYNC_FL;
else
return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
}
/*
- * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
+ * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
+ * ioctl.
*/
-static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
+static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
{
unsigned int iflags = 0;
@@ -136,20 +138,20 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
/*
* Update inode->i_flags based on the btrfs internal flags.
*/
-void btrfs_update_iflags(struct inode *inode)
+void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
{
- struct btrfs_inode *ip = BTRFS_I(inode);
+ struct btrfs_inode *binode = BTRFS_I(inode);
unsigned int new_fl = 0;
- if (ip->flags & BTRFS_INODE_SYNC)
+ if (binode->flags & BTRFS_INODE_SYNC)
new_fl |= S_SYNC;
- if (ip->flags & BTRFS_INODE_IMMUTABLE)
+ if (binode->flags & BTRFS_INODE_IMMUTABLE)
new_fl |= S_IMMUTABLE;
- if (ip->flags & BTRFS_INODE_APPEND)
+ if (binode->flags & BTRFS_INODE_APPEND)
new_fl |= S_APPEND;
- if (ip->flags & BTRFS_INODE_NOATIME)
+ if (binode->flags & BTRFS_INODE_NOATIME)
new_fl |= S_NOATIME;
- if (ip->flags & BTRFS_INODE_DIRSYNC)
+ if (binode->flags & BTRFS_INODE_DIRSYNC)
new_fl |= S_DIRSYNC;
set_mask_bits(&inode->i_flags,
@@ -159,15 +161,16 @@ void btrfs_update_iflags(struct inode *inode)
static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
{
- struct btrfs_inode *ip = BTRFS_I(file_inode(file));
- unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
+ struct btrfs_inode *binode = BTRFS_I(file_inode(file));
+ unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags);
if (copy_to_user(arg, &flags, sizeof(flags)))
return -EFAULT;
return 0;
}
-static int check_flags(unsigned int flags)
+/* Check if @flags are a supported and valid set of FS_*_FL flags */
+static int check_fsflags(unsigned int flags)
{
if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
FS_NOATIME_FL | FS_NODUMP_FL | \
@@ -186,13 +189,13 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_inode *ip = BTRFS_I(inode);
- struct btrfs_root *root = ip->root;
+ struct btrfs_inode *binode = BTRFS_I(inode);
+ struct btrfs_root *root = binode->root;
struct btrfs_trans_handle *trans;
- unsigned int flags, oldflags;
+ unsigned int fsflags, old_fsflags;
int ret;
- u64 ip_oldflags;
- unsigned int i_oldflags;
+ u64 old_flags;
+ unsigned int old_i_flags;
umode_t mode;
if (!inode_owner_or_capable(inode))
@@ -201,10 +204,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (btrfs_root_readonly(root))
return -EROFS;
- if (copy_from_user(&flags, arg, sizeof(flags)))
+ if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
return -EFAULT;
- ret = check_flags(flags);
+ ret = check_fsflags(fsflags);
if (ret)
return ret;
@@ -214,44 +217,44 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
inode_lock(inode);
- ip_oldflags = ip->flags;
- i_oldflags = inode->i_flags;
+ old_flags = binode->flags;
+ old_i_flags = inode->i_flags;
mode = inode->i_mode;
- flags = btrfs_mask_flags(inode->i_mode, flags);
- oldflags = btrfs_flags_to_ioctl(ip->flags);
- if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+ fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
+ old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+ if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
ret = -EPERM;
goto out_unlock;
}
}
- if (flags & FS_SYNC_FL)
- ip->flags |= BTRFS_INODE_SYNC;
+ if (fsflags & FS_SYNC_FL)
+ binode->flags |= BTRFS_INODE_SYNC;
else
- ip->flags &= ~BTRFS_INODE_SYNC;
- if (flags & FS_IMMUTABLE_FL)
- ip->flags |= BTRFS_INODE_IMMUTABLE;
+ binode->flags &= ~BTRFS_INODE_SYNC;
+ if (fsflags & FS_IMMUTABLE_FL)
+ binode->flags |= BTRFS_INODE_IMMUTABLE;
else
- ip->flags &= ~BTRFS_INODE_IMMUTABLE;
- if (flags & FS_APPEND_FL)
- ip->flags |= BTRFS_INODE_APPEND;
+ binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+ if (fsflags & FS_APPEND_FL)
+ binode->flags |= BTRFS_INODE_APPEND;
else
- ip->flags &= ~BTRFS_INODE_APPEND;
- if (flags & FS_NODUMP_FL)
- ip->flags |= BTRFS_INODE_NODUMP;
+ binode->flags &= ~BTRFS_INODE_APPEND;
+ if (fsflags & FS_NODUMP_FL)
+ binode->flags |= BTRFS_INODE_NODUMP;
else
- ip->flags &= ~BTRFS_INODE_NODUMP;
- if (flags & FS_NOATIME_FL)
- ip->flags |= BTRFS_INODE_NOATIME;
+ binode->flags &= ~BTRFS_INODE_NODUMP;
+ if (fsflags & FS_NOATIME_FL)
+ binode->flags |= BTRFS_INODE_NOATIME;
else
- ip->flags &= ~BTRFS_INODE_NOATIME;
- if (flags & FS_DIRSYNC_FL)
- ip->flags |= BTRFS_INODE_DIRSYNC;
+ binode->flags &= ~BTRFS_INODE_NOATIME;
+ if (fsflags & FS_DIRSYNC_FL)
+ binode->flags |= BTRFS_INODE_DIRSYNC;
else
- ip->flags &= ~BTRFS_INODE_DIRSYNC;
- if (flags & FS_NOCOW_FL) {
+ binode->flags &= ~BTRFS_INODE_DIRSYNC;
+ if (fsflags & FS_NOCOW_FL) {
if (S_ISREG(mode)) {
/*
* It's safe to turn csums off here, no extents exist.
@@ -259,10 +262,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
* status of the file and will not set it.
*/
if (inode->i_size == 0)
- ip->flags |= BTRFS_INODE_NODATACOW
- | BTRFS_INODE_NODATASUM;
+ binode->flags |= BTRFS_INODE_NODATACOW
+ | BTRFS_INODE_NODATASUM;
} else {
- ip->flags |= BTRFS_INODE_NODATACOW;
+ binode->flags |= BTRFS_INODE_NODATACOW;
}
} else {
/*
@@ -270,10 +273,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
*/
if (S_ISREG(mode)) {
if (inode->i_size == 0)
- ip->flags &= ~(BTRFS_INODE_NODATACOW
+ binode->flags &= ~(BTRFS_INODE_NODATACOW
| BTRFS_INODE_NODATASUM);
} else {
- ip->flags &= ~BTRFS_INODE_NODATACOW;
+ binode->flags &= ~BTRFS_INODE_NODATACOW;
}
}
@@ -282,18 +285,18 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
* flag may be changed automatically if compression code won't make
* things smaller.
*/
- if (flags & FS_NOCOMP_FL) {
- ip->flags &= ~BTRFS_INODE_COMPRESS;
- ip->flags |= BTRFS_INODE_NOCOMPRESS;
+ if (fsflags & FS_NOCOMP_FL) {
+ binode->flags &= ~BTRFS_INODE_COMPRESS;
+ binode->flags |= BTRFS_INODE_NOCOMPRESS;
ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
if (ret && ret != -ENODATA)
goto out_drop;
- } else if (flags & FS_COMPR_FL) {
+ } else if (fsflags & FS_COMPR_FL) {
const char *comp;
- ip->flags |= BTRFS_INODE_COMPRESS;
- ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+ binode->flags |= BTRFS_INODE_COMPRESS;
+ binode->flags &= ~BTRFS_INODE_NOCOMPRESS;
comp = btrfs_compress_type2str(fs_info->compress_type);
if (!comp || comp[0] == 0)
@@ -308,7 +311,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
if (ret && ret != -ENODATA)
goto out_drop;
- ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
+ binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
}
trans = btrfs_start_transaction(root, 1);
@@ -317,7 +320,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
goto out_drop;
}
- btrfs_update_iflags(inode);
+ btrfs_sync_inode_flags_to_i_flags(inode);
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, inode);
@@ -325,8 +328,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
btrfs_end_transaction(trans);
out_drop:
if (ret) {
- ip->flags = ip_oldflags;
- inode->i_flags = i_oldflags;
+ binode->flags = old_flags;
+ inode->i_flags = old_i_flags;
}
out_unlock:
@@ -335,6 +338,148 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
return ret;
}
+/*
+ * Translate btrfs internal inode flags to xflags as expected by the
+ * FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are
+ * silently dropped.
+ */
+static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags)
+{
+ unsigned int xflags = 0;
+
+ if (flags & BTRFS_INODE_APPEND)
+ xflags |= FS_XFLAG_APPEND;
+ if (flags & BTRFS_INODE_IMMUTABLE)
+ xflags |= FS_XFLAG_IMMUTABLE;
+ if (flags & BTRFS_INODE_NOATIME)
+ xflags |= FS_XFLAG_NOATIME;
+ if (flags & BTRFS_INODE_NODUMP)
+ xflags |= FS_XFLAG_NODUMP;
+ if (flags & BTRFS_INODE_SYNC)
+ xflags |= FS_XFLAG_SYNC;
+
+ return xflags;
+}
+
+/* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */
+static int check_xflags(unsigned int flags)
+{
+ if (flags & ~(FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE | FS_XFLAG_NOATIME |
+ FS_XFLAG_NODUMP | FS_XFLAG_SYNC))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+/*
+ * Set the xflags from the internal inode flags. The remaining items of fsxattr
+ * are zeroed.
+ */
+static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg)
+{
+ struct btrfs_inode *binode = BTRFS_I(file_inode(file));
+ struct fsxattr fa;
+
+ memset(&fa, 0, sizeof(fa));
+ fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);
+
+ if (copy_to_user(arg, &fa, sizeof(fa)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
+{
+ struct inode *inode = file_inode(file);
+ struct btrfs_inode *binode = BTRFS_I(inode);
+ struct btrfs_root *root = binode->root;
+ struct btrfs_trans_handle *trans;
+ struct fsxattr fa;
+ unsigned old_flags;
+ unsigned old_i_flags;
+ int ret = 0;
+
+ if (!inode_owner_or_capable(inode))
+ return -EPERM;
+
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
+ memset(&fa, 0, sizeof(fa));
+ if (copy_from_user(&fa, arg, sizeof(fa)))
+ return -EFAULT;
+
+ ret = check_xflags(fa.fsx_xflags);
+ if (ret)
+ return ret;
+
+ if (fa.fsx_extsize != 0 || fa.fsx_projid != 0 || fa.fsx_cowextsize != 0)
+ return -EOPNOTSUPP;
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
+ inode_lock(inode);
+
+ old_flags = binode->flags;
+ old_i_flags = inode->i_flags;
+
+ /* We need the capabilities to change append-only or immutable inode */
+ if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) ||
+ (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) &&
+ !capable(CAP_LINUX_IMMUTABLE)) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ if (fa.fsx_xflags & FS_XFLAG_SYNC)
+ binode->flags |= BTRFS_INODE_SYNC;
+ else
+ binode->flags &= ~BTRFS_INODE_SYNC;
+ if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE)
+ binode->flags |= BTRFS_INODE_IMMUTABLE;
+ else
+ binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+ if (fa.fsx_xflags & FS_XFLAG_APPEND)
+ binode->flags |= BTRFS_INODE_APPEND;
+ else
+ binode->flags &= ~BTRFS_INODE_APPEND;
+ if (fa.fsx_xflags & FS_XFLAG_NODUMP)
+ binode->flags |= BTRFS_INODE_NODUMP;
+ else
+ binode->flags &= ~BTRFS_INODE_NODUMP;
+ if (fa.fsx_xflags & FS_XFLAG_NOATIME)
+ binode->flags |= BTRFS_INODE_NOATIME;
+ else
+ binode->flags &= ~BTRFS_INODE_NOATIME;
+
+ /* 1 item for the inode */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out_unlock;
+ }
+
+ btrfs_sync_inode_flags_to_i_flags(inode);
+ inode_inc_iversion(inode);
+ inode->i_ctime = current_time(inode);
+ ret = btrfs_update_inode(trans, root, inode);
+
+ btrfs_end_transaction(trans);
+
+out_unlock:
+ if (ret) {
+ binode->flags = old_flags;
+ inode->i_flags = old_i_flags;
+ }
+
+ inode_unlock(inode);
+ mnt_drop_write_file(file);
+
+ return ret;
+}
+
static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
{
struct inode *inode = file_inode(file);
@@ -424,7 +569,6 @@ static noinline int create_subvol(struct inode *dir,
u64 objectid;
u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
u64 index = 0;
- u64 qgroup_reserved;
uuid_le new_uuid;
root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
@@ -449,8 +593,7 @@ static noinline int create_subvol(struct inode *dir,
* The same as the snapshot creation, please see the comment
* of create_snapshot().
*/
- ret = btrfs_subvolume_reserve_metadata(root, &block_rsv,
- 8, &qgroup_reserved, false);
+ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false);
if (ret)
goto fail_free;
@@ -573,7 +716,7 @@ static noinline int create_subvol(struct inode *dir,
btrfs_ino(BTRFS_I(dir)), index, name, namelen);
BUG_ON(ret);
- ret = btrfs_uuid_tree_add(trans, fs_info, root_item->uuid,
+ ret = btrfs_uuid_tree_add(trans, root_item->uuid,
BTRFS_UUID_KEY_SUBVOL, objectid);
if (ret)
btrfs_abort_transaction(trans, ret);
@@ -640,7 +783,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
wait_event(root->subv_writers->wait,
percpu_counter_sum(&root->subv_writers->counter) == 0);
- ret = btrfs_start_delalloc_inodes(root, 0);
+ ret = btrfs_start_delalloc_inodes(root);
if (ret)
goto dec_and_free;
@@ -658,7 +801,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
*/
ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
&pending_snapshot->block_rsv, 8,
- &pending_snapshot->qgroup_reserved,
false);
if (ret)
goto dec_and_free;
@@ -1457,7 +1599,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
}
- mutex_lock(&fs_info->volume_mutex);
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
@@ -1565,7 +1706,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,
out_free:
kfree(vol_args);
out:
- mutex_unlock(&fs_info->volume_mutex);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
mnt_drop_write_file(file);
return ret;
@@ -1832,60 +1972,6 @@ out:
return ret;
}
-/*
- * helper to check if the subvolume references other subvolumes
- */
-static noinline int may_destroy_subvol(struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_path *path;
- struct btrfs_dir_item *di;
- struct btrfs_key key;
- u64 dir_id;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- /* Make sure this root isn't set as the default subvol */
- dir_id = btrfs_super_root_dir(fs_info->super_copy);
- di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
- dir_id, "default", 7, 0);
- if (di && !IS_ERR(di)) {
- btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
- if (key.objectid == root->root_key.objectid) {
- ret = -EPERM;
- btrfs_err(fs_info,
- "deleting default subvolume %llu is not allowed",
- key.objectid);
- goto out;
- }
- btrfs_release_path(path);
- }
-
- key.objectid = root->root_key.objectid;
- key.type = BTRFS_ROOT_REF_KEY;
- key.offset = (u64)-1;
-
- ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- BUG_ON(ret == 0);
-
- ret = 0;
- if (path->slots[0] > 0) {
- path->slots[0]--;
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
- if (key.objectid == root->root_key.objectid &&
- key.type == BTRFS_ROOT_REF_KEY)
- ret = -ENOTEMPTY;
- }
-out:
- btrfs_free_path(path);
- return ret;
-}
-
static noinline int key_in_sk(struct btrfs_key *key,
struct btrfs_ioctl_search_key *sk)
{
@@ -2066,7 +2152,7 @@ static noinline int search_ioctl(struct inode *inode,
root = btrfs_read_fs_root_no_name(info, &key);
if (IS_ERR(root)) {
btrfs_free_path(path);
- return -ENOENT;
+ return PTR_ERR(root);
}
}
@@ -2200,8 +2286,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
key.offset = (u64)-1;
root = btrfs_read_fs_root_no_name(info, &key);
if (IS_ERR(root)) {
- btrfs_err(info, "could not find root %llu", tree_id);
- ret = -ENOENT;
+ ret = PTR_ERR(root);
goto out;
}
@@ -2256,6 +2341,165 @@ out:
return ret;
}
+static int btrfs_search_path_in_tree_user(struct inode *inode,
+ struct btrfs_ioctl_ino_lookup_user_args *args)
+{
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct super_block *sb = inode->i_sb;
+ struct btrfs_key upper_limit = BTRFS_I(inode)->location;
+ u64 treeid = BTRFS_I(inode)->root->root_key.objectid;
+ u64 dirid = args->dirid;
+ unsigned long item_off;
+ unsigned long item_len;
+ struct btrfs_inode_ref *iref;
+ struct btrfs_root_ref *rref;
+ struct btrfs_root *root;
+ struct btrfs_path *path;
+ struct btrfs_key key, key2;
+ struct extent_buffer *leaf;
+ struct inode *temp_inode;
+ char *ptr;
+ int slot;
+ int len;
+ int total_len = 0;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /*
+ * If the bottom subvolume does not exist directly under upper_limit,
+ * construct the path in from the bottom up.
+ */
+ if (dirid != upper_limit.objectid) {
+ ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
+
+ key.objectid = treeid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ goto out;
+ }
+
+ key.objectid = dirid;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = (u64)-1;
+ while (1) {
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = btrfs_previous_item(root, path, dirid,
+ BTRFS_INODE_REF_KEY);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref);
+ len = btrfs_inode_ref_name_len(leaf, iref);
+ ptr -= len + 1;
+ total_len += len + 1;
+ if (ptr < args->path) {
+ ret = -ENAMETOOLONG;
+ goto out;
+ }
+
+ *(ptr + len) = '/';
+ read_extent_buffer(leaf, ptr,
+ (unsigned long)(iref + 1), len);
+
+ /* Check the read+exec permission of this directory */
+ ret = btrfs_previous_item(root, path, dirid,
+ BTRFS_INODE_ITEM_KEY);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key2, slot);
+ if (key2.objectid != dirid) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ temp_inode = btrfs_iget(sb, &key2, root, NULL);
+ ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC);
+ iput(temp_inode);
+ if (ret) {
+ ret = -EACCES;
+ goto out;
+ }
+
+ if (key.offset == upper_limit.objectid)
+ break;
+ if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
+ ret = -EACCES;
+ goto out;
+ }
+
+ btrfs_release_path(path);
+ key.objectid = key.offset;
+ key.offset = (u64)-1;
+ dirid = key.objectid;
+ }
+
+ memmove(args->path, ptr, total_len);
+ args->path[total_len] = '\0';
+ btrfs_release_path(path);
+ }
+
+ /* Get the bottom subvolume's name from ROOT_REF */
+ root = fs_info->tree_root;
+ key.objectid = treeid;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = args->treeid;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ item_off = btrfs_item_ptr_offset(leaf, slot);
+ item_len = btrfs_item_size_nr(leaf, slot);
+ /* Check if dirid in ROOT_REF corresponds to passed dirid */
+ rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
+ if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Copy subvolume's name */
+ item_off += sizeof(struct btrfs_root_ref);
+ item_len -= sizeof(struct btrfs_root_ref);
+ read_extent_buffer(leaf, args->name, item_off, item_len);
+ args->name[item_len] = 0;
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
static noinline int btrfs_ioctl_ino_lookup(struct file *file,
void __user *argp)
{
@@ -2298,6 +2542,265 @@ out:
return ret;
}
+/*
+ * Version of ino_lookup ioctl (unprivileged)
+ *
+ * The main differences from ino_lookup ioctl are:
+ *
+ * 1. Read + Exec permission will be checked using inode_permission() during
+ * path construction. -EACCES will be returned in case of failure.
+ * 2. Path construction will be stopped at the inode number which corresponds
+ * to the fd with which this ioctl is called. If constructed path does not
+ * exist under fd's inode, -EACCES will be returned.
+ * 3. The name of bottom subvolume is also searched and filled.
+ */
+static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
+{
+ struct btrfs_ioctl_ino_lookup_user_args *args;
+ struct inode *inode;
+ int ret;
+
+ args = memdup_user(argp, sizeof(*args));
+ if (IS_ERR(args))
+ return PTR_ERR(args);
+
+ inode = file_inode(file);
+
+ if (args->dirid == BTRFS_FIRST_FREE_OBJECTID &&
+ BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * The subvolume does not exist under fd with which this is
+ * called
+ */
+ kfree(args);
+ return -EACCES;
+ }
+
+ ret = btrfs_search_path_in_tree_user(inode, args);
+
+ if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
+ ret = -EFAULT;
+
+ kfree(args);
+ return ret;
+}
+
+/* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */
+static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
+{
+ struct btrfs_ioctl_get_subvol_info_args *subvol_info;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_root *root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_root_item *root_item;
+ struct btrfs_root_ref *rref;
+ struct extent_buffer *leaf;
+ unsigned long item_off;
+ unsigned long item_len;
+ struct inode *inode;
+ int slot;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ subvol_info = kzalloc(sizeof(*subvol_info), GFP_KERNEL);
+ if (!subvol_info) {
+ btrfs_free_path(path);
+ return -ENOMEM;
+ }
+
+ inode = file_inode(file);
+ fs_info = BTRFS_I(inode)->root->fs_info;
+
+ /* Get root_item of inode's subvolume */
+ key.objectid = BTRFS_I(inode)->root->root_key.objectid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ goto out;
+ }
+ root_item = &root->root_item;
+
+ subvol_info->treeid = key.objectid;
+
+ subvol_info->generation = btrfs_root_generation(root_item);
+ subvol_info->flags = btrfs_root_flags(root_item);
+
+ memcpy(subvol_info->uuid, root_item->uuid, BTRFS_UUID_SIZE);
+ memcpy(subvol_info->parent_uuid, root_item->parent_uuid,
+ BTRFS_UUID_SIZE);
+ memcpy(subvol_info->received_uuid, root_item->received_uuid,
+ BTRFS_UUID_SIZE);
+
+ subvol_info->ctransid = btrfs_root_ctransid(root_item);
+ subvol_info->ctime.sec = btrfs_stack_timespec_sec(&root_item->ctime);
+ subvol_info->ctime.nsec = btrfs_stack_timespec_nsec(&root_item->ctime);
+
+ subvol_info->otransid = btrfs_root_otransid(root_item);
+ subvol_info->otime.sec = btrfs_stack_timespec_sec(&root_item->otime);
+ subvol_info->otime.nsec = btrfs_stack_timespec_nsec(&root_item->otime);
+
+ subvol_info->stransid = btrfs_root_stransid(root_item);
+ subvol_info->stime.sec = btrfs_stack_timespec_sec(&root_item->stime);
+ subvol_info->stime.nsec = btrfs_stack_timespec_nsec(&root_item->stime);
+
+ subvol_info->rtransid = btrfs_root_rtransid(root_item);
+ subvol_info->rtime.sec = btrfs_stack_timespec_sec(&root_item->rtime);
+ subvol_info->rtime.nsec = btrfs_stack_timespec_nsec(&root_item->rtime);
+
+ if (key.objectid != BTRFS_FS_TREE_OBJECTID) {
+ /* Search root tree for ROOT_BACKREF of this subvolume */
+ root = fs_info->tree_root;
+
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (path->slots[0] >=
+ btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid == subvol_info->treeid &&
+ key.type == BTRFS_ROOT_BACKREF_KEY) {
+ subvol_info->parent_id = key.offset;
+
+ rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
+ subvol_info->dirid = btrfs_root_ref_dirid(leaf, rref);
+
+ item_off = btrfs_item_ptr_offset(leaf, slot)
+ + sizeof(struct btrfs_root_ref);
+ item_len = btrfs_item_size_nr(leaf, slot)
+ - sizeof(struct btrfs_root_ref);
+ read_extent_buffer(leaf, subvol_info->name,
+ item_off, item_len);
+ } else {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+
+ if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
+ ret = -EFAULT;
+
+out:
+ btrfs_free_path(path);
+ kzfree(subvol_info);
+ return ret;
+}
+
+/*
+ * Return ROOT_REF information of the subvolume containing this inode
+ * except the subvolume name.
+ */
+static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
+{
+ struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
+ struct btrfs_root_ref *rref;
+ struct btrfs_root *root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct inode *inode;
+ u64 objectid;
+ int slot;
+ int ret;
+ u8 found;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ rootrefs = memdup_user(argp, sizeof(*rootrefs));
+ if (IS_ERR(rootrefs)) {
+ btrfs_free_path(path);
+ return PTR_ERR(rootrefs);
+ }
+
+ inode = file_inode(file);
+ root = BTRFS_I(inode)->root->fs_info->tree_root;
+ objectid = BTRFS_I(inode)->root->root_key.objectid;
+
+ key.objectid = objectid;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = rootrefs->min_treeid;
+ found = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (path->slots[0] >=
+ btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+ while (1) {
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid != objectid || key.type != BTRFS_ROOT_REF_KEY) {
+ ret = 0;
+ goto out;
+ }
+
+ if (found == BTRFS_MAX_ROOTREF_BUFFER_NUM) {
+ ret = -EOVERFLOW;
+ goto out;
+ }
+
+ rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
+ rootrefs->rootref[found].treeid = key.offset;
+ rootrefs->rootref[found].dirid =
+ btrfs_root_ref_dirid(leaf, rref);
+ found++;
+
+ ret = btrfs_next_item(root, path);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+
+out:
+ if (!ret || ret == -EOVERFLOW) {
+ rootrefs->num_items = found;
+ /* update min_treeid for next search */
+ if (found)
+ rootrefs->min_treeid =
+ rootrefs->rootref[found - 1].treeid + 1;
+ if (copy_to_user(argp, rootrefs, sizeof(*rootrefs)))
+ ret = -EFAULT;
+ }
+
+ kfree(rootrefs);
+ btrfs_free_path(path);
+
+ return ret;
+}
+
static noinline int btrfs_ioctl_snap_destroy(struct file *file,
void __user *arg)
{
@@ -2309,12 +2812,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *dest = NULL;
struct btrfs_ioctl_vol_args *vol_args;
- struct btrfs_trans_handle *trans;
- struct btrfs_block_rsv block_rsv;
- u64 root_flags;
- u64 qgroup_reserved;
int namelen;
- int ret;
int err = 0;
if (!S_ISDIR(dir->i_mode))
@@ -2398,133 +2896,11 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
}
inode_lock(inode);
-
- /*
- * Don't allow to delete a subvolume with send in progress. This is
- * inside the i_mutex so the error handling that has to drop the bit
- * again is not run concurrently.
- */
- spin_lock(&dest->root_item_lock);
- root_flags = btrfs_root_flags(&dest->root_item);
- if (dest->send_in_progress == 0) {
- btrfs_set_root_flags(&dest->root_item,
- root_flags | BTRFS_ROOT_SUBVOL_DEAD);
- spin_unlock(&dest->root_item_lock);
- } else {
- spin_unlock(&dest->root_item_lock);
- btrfs_warn(fs_info,
- "Attempt to delete subvolume %llu during send",
- dest->root_key.objectid);
- err = -EPERM;
- goto out_unlock_inode;
- }
-
- down_write(&fs_info->subvol_sem);
-
- err = may_destroy_subvol(dest);
- if (err)
- goto out_up_write;
-
- btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
- /*
- * One for dir inode, two for dir entries, two for root
- * ref/backref.
- */
- err = btrfs_subvolume_reserve_metadata(root, &block_rsv,
- 5, &qgroup_reserved, true);
- if (err)
- goto out_up_write;
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- goto out_release;
- }
- trans->block_rsv = &block_rsv;
- trans->bytes_reserved = block_rsv.size;
-
- btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
-
- ret = btrfs_unlink_subvol(trans, root, dir,
- dest->root_key.objectid,
- dentry->d_name.name,
- dentry->d_name.len);
- if (ret) {
- err = ret;
- btrfs_abort_transaction(trans, ret);
- goto out_end_trans;
- }
-
- btrfs_record_root_in_trans(trans, dest);
-
- memset(&dest->root_item.drop_progress, 0,
- sizeof(dest->root_item.drop_progress));
- dest->root_item.drop_level = 0;
- btrfs_set_root_refs(&dest->root_item, 0);
-
- if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
- ret = btrfs_insert_orphan_item(trans,
- fs_info->tree_root,
- dest->root_key.objectid);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
- err = ret;
- goto out_end_trans;
- }
- }
-
- ret = btrfs_uuid_tree_rem(trans, fs_info, dest->root_item.uuid,
- BTRFS_UUID_KEY_SUBVOL,
- dest->root_key.objectid);
- if (ret && ret != -ENOENT) {
- btrfs_abort_transaction(trans, ret);
- err = ret;
- goto out_end_trans;
- }
- if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
- ret = btrfs_uuid_tree_rem(trans, fs_info,
- dest->root_item.received_uuid,
- BTRFS_UUID_KEY_RECEIVED_SUBVOL,
- dest->root_key.objectid);
- if (ret && ret != -ENOENT) {
- btrfs_abort_transaction(trans, ret);
- err = ret;
- goto out_end_trans;
- }
- }
-
-out_end_trans:
- trans->block_rsv = NULL;
- trans->bytes_reserved = 0;
- ret = btrfs_end_transaction(trans);
- if (ret && !err)
- err = ret;
- inode->i_flags |= S_DEAD;
-out_release:
- btrfs_subvolume_release_metadata(fs_info, &block_rsv);
-out_up_write:
- up_write(&fs_info->subvol_sem);
- if (err) {
- spin_lock(&dest->root_item_lock);
- root_flags = btrfs_root_flags(&dest->root_item);
- btrfs_set_root_flags(&dest->root_item,
- root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
- spin_unlock(&dest->root_item_lock);
- }
-out_unlock_inode:
+ err = btrfs_delete_subvolume(dir, dentry);
inode_unlock(inode);
- if (!err) {
- d_invalidate(dentry);
- btrfs_invalidate_inodes(dest);
+ if (!err)
d_delete(dentry);
- ASSERT(dest->send_in_progress == 0);
- /* the last ref */
- if (dest->ino_cache_inode) {
- iput(dest->ino_cache_inode);
- dest->ino_cache_inode = NULL;
- }
- }
out_dput:
dput(dentry);
out_unlock_dir:
@@ -2613,7 +2989,6 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
- mutex_lock(&fs_info->volume_mutex);
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
@@ -2628,7 +3003,6 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
kfree(vol_args);
out:
- mutex_unlock(&fs_info->volume_mutex);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
return ret;
}
@@ -2654,8 +3028,10 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
}
/* Check for compatibility reject unknown flags */
- if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED)
- return -EOPNOTSUPP;
+ if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
@@ -2954,8 +3330,6 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
put_page(pg);
}
}
- kfree(cmp->src_pages);
- kfree(cmp->dst_pages);
}
static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
@@ -2964,40 +3338,14 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
{
int ret;
int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT;
- struct page **src_pgarr, **dst_pgarr;
- /*
- * We must gather up all the pages before we initiate our
- * extent locking. We use an array for the page pointers. Size
- * of the array is bounded by len, which is in turn bounded by
- * BTRFS_MAX_DEDUPE_LEN.
- */
- src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
- dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
- if (!src_pgarr || !dst_pgarr) {
- kfree(src_pgarr);
- kfree(dst_pgarr);
- return -ENOMEM;
- }
cmp->num_pages = num_pages;
- cmp->src_pages = src_pgarr;
- cmp->dst_pages = dst_pgarr;
-
- /*
- * If deduping ranges in the same inode, locking rules make it mandatory
- * to always lock pages in ascending order to avoid deadlocks with
- * concurrent tasks (such as starting writeback/delalloc).
- */
- if (src == dst && dst_loff < loff) {
- swap(src_pgarr, dst_pgarr);
- swap(loff, dst_loff);
- }
- ret = gather_extent_pages(src, src_pgarr, cmp->num_pages, loff);
+ ret = gather_extent_pages(src, cmp->src_pages, num_pages, loff);
if (ret)
goto out;
- ret = gather_extent_pages(dst, dst_pgarr, cmp->num_pages, dst_loff);
+ ret = gather_extent_pages(dst, cmp->dst_pages, num_pages, dst_loff);
out:
if (ret)
@@ -3067,31 +3415,23 @@ static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
return 0;
}
-static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
- struct inode *dst, u64 dst_loff)
+static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
+ struct inode *dst, u64 dst_loff,
+ struct cmp_pages *cmp)
{
int ret;
u64 len = olen;
- struct cmp_pages cmp;
bool same_inode = (src == dst);
u64 same_lock_start = 0;
u64 same_lock_len = 0;
- if (len == 0)
- return 0;
-
- if (same_inode)
- inode_lock(src);
- else
- btrfs_double_inode_lock(src, dst);
-
ret = extent_same_check_offsets(src, loff, &len, olen);
if (ret)
- goto out_unlock;
+ return ret;
ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
if (ret)
- goto out_unlock;
+ return ret;
if (same_inode) {
/*
@@ -3108,32 +3448,21 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
* allow an unaligned length so long as it ends at
* i_size.
*/
- if (len != olen) {
- ret = -EINVAL;
- goto out_unlock;
- }
+ if (len != olen)
+ return -EINVAL;
/* Check for overlapping ranges */
- if (dst_loff + len > loff && dst_loff < loff + len) {
- ret = -EINVAL;
- goto out_unlock;
- }
+ if (dst_loff + len > loff && dst_loff < loff + len)
+ return -EINVAL;
same_lock_start = min_t(u64, loff, dst_loff);
same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
}
- /* don't make the dst file partly checksummed */
- if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
- (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
- ret = -EINVAL;
- goto out_unlock;
- }
-
again:
- ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
+ ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, cmp);
if (ret)
- goto out_unlock;
+ return ret;
if (same_inode)
ret = lock_extent_range(src, same_lock_start, same_lock_len,
@@ -3154,7 +3483,7 @@ again:
* Ranges in the io trees already unlocked. Now unlock all
* pages before waiting for all IO to complete.
*/
- btrfs_cmp_data_free(&cmp);
+ btrfs_cmp_data_free(cmp);
if (same_inode) {
btrfs_wait_ordered_range(src, same_lock_start,
same_lock_len);
@@ -3167,12 +3496,12 @@ again:
ASSERT(ret == 0);
if (WARN_ON(ret)) {
/* ranges in the io trees already unlocked */
- btrfs_cmp_data_free(&cmp);
+ btrfs_cmp_data_free(cmp);
return ret;
}
/* pass original length for comparison so we stay within i_size */
- ret = btrfs_cmp_data(olen, &cmp);
+ ret = btrfs_cmp_data(olen, cmp);
if (ret == 0)
ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
@@ -3182,18 +3511,91 @@ again:
else
btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
- btrfs_cmp_data_free(&cmp);
+ btrfs_cmp_data_free(cmp);
+
+ return ret;
+}
+
+#define BTRFS_MAX_DEDUPE_LEN SZ_16M
+
+static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
+ struct inode *dst, u64 dst_loff)
+{
+ int ret;
+ struct cmp_pages cmp;
+ int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT;
+ bool same_inode = (src == dst);
+ u64 i, tail_len, chunk_count;
+
+ if (olen == 0)
+ return 0;
+
+ if (same_inode)
+ inode_lock(src);
+ else
+ btrfs_double_inode_lock(src, dst);
+
+ /* don't make the dst file partly checksummed */
+ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+ (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
+ chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
+ if (chunk_count == 0)
+ num_pages = PAGE_ALIGN(tail_len) >> PAGE_SHIFT;
+
+ /*
+ * If deduping ranges in the same inode, locking rules make it
+ * mandatory to always lock pages in ascending order to avoid deadlocks
+ * with concurrent tasks (such as starting writeback/delalloc).
+ */
+ if (same_inode && dst_loff < loff)
+ swap(loff, dst_loff);
+
+ /*
+ * We must gather up all the pages before we initiate our extent
+ * locking. We use an array for the page pointers. Size of the array is
+ * bounded by len, which is in turn bounded by BTRFS_MAX_DEDUPE_LEN.
+ */
+ cmp.src_pages = kvmalloc_array(num_pages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ cmp.dst_pages = kvmalloc_array(num_pages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!cmp.src_pages || !cmp.dst_pages) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ for (i = 0; i < chunk_count; i++) {
+ ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
+ dst, dst_loff, &cmp);
+ if (ret)
+ goto out_unlock;
+
+ loff += BTRFS_MAX_DEDUPE_LEN;
+ dst_loff += BTRFS_MAX_DEDUPE_LEN;
+ }
+
+ if (tail_len > 0)
+ ret = btrfs_extent_same_range(src, loff, tail_len, dst,
+ dst_loff, &cmp);
+
out_unlock:
if (same_inode)
inode_unlock(src);
else
btrfs_double_inode_unlock(src, dst);
+out_free:
+ kvfree(cmp.src_pages);
+ kvfree(cmp.dst_pages);
+
return ret;
}
-#define BTRFS_MAX_DEDUPE_LEN SZ_16M
-
ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
struct file *dst_file, u64 dst_loff)
{
@@ -3202,9 +3604,6 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
ssize_t res;
- if (olen > BTRFS_MAX_DEDUPE_LEN)
- olen = BTRFS_MAX_DEDUPE_LEN;
-
if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
/*
* Btrfs does not support blocksize < page_size. As a
@@ -3826,11 +4225,6 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
src->i_sb != inode->i_sb)
return -EXDEV;
- /* don't make the dst file partly checksummed */
- if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
- (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
- return -EINVAL;
-
if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
return -EISDIR;
@@ -3840,6 +4234,13 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
inode_lock(src);
}
+ /* don't make the dst file partly checksummed */
+ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+ (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
/* determine range to clone */
ret = -EINVAL;
if (off + len > src->i_size || off + len < off)
@@ -4007,8 +4408,8 @@ out:
return ret;
}
-void btrfs_get_block_group_info(struct list_head *groups_list,
- struct btrfs_ioctl_space_info *space)
+static void get_block_group_info(struct list_head *groups_list,
+ struct btrfs_ioctl_space_info *space)
{
struct btrfs_block_group_cache *block_group;
@@ -4124,8 +4525,8 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
down_read(&info->groups_sem);
for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
if (!list_empty(&info->block_groups[c])) {
- btrfs_get_block_group_info(
- &info->block_groups[c], &space);
+ get_block_group_info(&info->block_groups[c],
+ &space);
memcpy(dest, &space, sizeof(space));
dest++;
space_args.total_spaces++;
@@ -4490,14 +4891,14 @@ out_loi:
return ret;
}
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs)
{
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
bargs->flags = bctl->flags;
- if (atomic_read(&fs_info->balance_running))
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags))
bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
if (atomic_read(&fs_info->balance_pause_req))
bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
@@ -4508,13 +4909,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
- if (lock) {
- spin_lock(&fs_info->balance_lock);
- memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
- spin_unlock(&fs_info->balance_lock);
- } else {
- memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
- }
+ spin_lock(&fs_info->balance_lock);
+ memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
+ spin_unlock(&fs_info->balance_lock);
}
static long btrfs_ioctl_balance(struct file *file, void __user *arg)
@@ -4535,7 +4932,6 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
again:
if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
- mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
need_unlock = true;
goto locked;
@@ -4550,21 +4946,22 @@ again:
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl) {
/* this is either (2) or (3) */
- if (!atomic_read(&fs_info->balance_running)) {
+ if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
mutex_unlock(&fs_info->balance_mutex);
- if (!mutex_trylock(&fs_info->volume_mutex))
- goto again;
+ /*
+ * Lock released to allow other waiters to continue,
+ * we'll reexamine the status again.
+ */
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl &&
- !atomic_read(&fs_info->balance_running)) {
+ !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
/* this is (3) */
need_unlock = false;
goto locked;
}
mutex_unlock(&fs_info->balance_mutex);
- mutex_unlock(&fs_info->volume_mutex);
goto again;
} else {
/* this is (2) */
@@ -4617,7 +5014,6 @@ locked:
goto out_bargs;
}
- bctl->fs_info = fs_info;
if (arg) {
memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
@@ -4636,14 +5032,14 @@ locked:
do_balance:
/*
- * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP
- * goes to to btrfs_balance. bctl is freed in __cancel_balance,
- * or, if restriper was paused all the way until unmount, in
- * free_fs_info. The flag is cleared in __cancel_balance.
+ * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to
+ * btrfs_balance. bctl is freed in reset_balance_state, or, if
+ * restriper was paused all the way until unmount, in free_fs_info.
+ * The flag should be cleared after reset_balance_state.
*/
need_unlock = false;
- ret = btrfs_balance(bctl, bargs);
+ ret = btrfs_balance(fs_info, bctl, bargs);
bctl = NULL;
if (arg) {
@@ -4657,7 +5053,6 @@ out_bargs:
kfree(bargs);
out_unlock:
mutex_unlock(&fs_info->balance_mutex);
- mutex_unlock(&fs_info->volume_mutex);
if (need_unlock)
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
out:
@@ -4701,7 +5096,7 @@ static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,
goto out;
}
- update_ioctl_balance_args(fs_info, 1, bargs);
+ btrfs_update_ioctl_balance_args(fs_info, bargs);
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
@@ -5038,8 +5433,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
BTRFS_UUID_SIZE);
if (received_uuid_changed &&
!btrfs_is_empty_uuid(root_item->received_uuid)) {
- ret = btrfs_uuid_tree_rem(trans, fs_info,
- root_item->received_uuid,
+ ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
root->root_key.objectid);
if (ret && ret != -ENOENT) {
@@ -5063,7 +5457,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
goto out;
}
if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) {
- ret = btrfs_uuid_tree_add(trans, fs_info, sa->uuid,
+ ret = btrfs_uuid_tree_add(trans, sa->uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
root->root_key.objectid);
if (ret < 0 && ret != -EEXIST) {
@@ -5497,7 +5891,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SYNC: {
int ret;
- ret = btrfs_start_delalloc_roots(fs_info, 0, -1);
+ ret = btrfs_start_delalloc_roots(fs_info, -1);
if (ret)
return ret;
ret = btrfs_sync_fs(inode->i_sb, 1);
@@ -5565,6 +5959,16 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_get_features(file, argp);
case BTRFS_IOC_SET_FEATURES:
return btrfs_ioctl_set_features(file, argp);
+ case FS_IOC_FSGETXATTR:
+ return btrfs_ioctl_fsgetxattr(file, argp);
+ case FS_IOC_FSSETXATTR:
+ return btrfs_ioctl_fssetxattr(file, argp);
+ case BTRFS_IOC_GET_SUBVOL_INFO:
+ return btrfs_ioctl_get_subvol_info(file, argp);
+ case BTRFS_IOC_GET_SUBVOL_ROOTREF:
+ return btrfs_ioctl_get_subvol_rootref(file, argp);
+ case BTRFS_IOC_INO_LOOKUP_USER:
+ return btrfs_ioctl_ino_lookup_user(file, argp);
}
return -ENOTTY;
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index e4faefac9d16..1da768e5ef75 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -66,22 +66,16 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
write_lock(&eb->lock);
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
- /*
- * atomic_dec_and_test implies a barrier for waitqueue_active
- */
- if (atomic_dec_and_test(&eb->blocking_writers) &&
- waitqueue_active(&eb->write_lock_wq))
- wake_up(&eb->write_lock_wq);
+ /* atomic_dec_and_test implies a barrier */
+ if (atomic_dec_and_test(&eb->blocking_writers))
+ cond_wake_up_nomb(&eb->write_lock_wq);
} else if (rw == BTRFS_READ_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
read_lock(&eb->lock);
atomic_inc(&eb->spinning_readers);
- /*
- * atomic_dec_and_test implies a barrier for waitqueue_active
- */
- if (atomic_dec_and_test(&eb->blocking_readers) &&
- waitqueue_active(&eb->read_lock_wq))
- wake_up(&eb->read_lock_wq);
+ /* atomic_dec_and_test implies a barrier */
+ if (atomic_dec_and_test(&eb->blocking_readers))
+ cond_wake_up_nomb(&eb->read_lock_wq);
}
}
@@ -221,12 +215,9 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
- /*
- * atomic_dec_and_test implies a barrier for waitqueue_active
- */
- if (atomic_dec_and_test(&eb->blocking_readers) &&
- waitqueue_active(&eb->read_lock_wq))
- wake_up(&eb->read_lock_wq);
+ /* atomic_dec_and_test implies a barrier */
+ if (atomic_dec_and_test(&eb->blocking_readers))
+ cond_wake_up_nomb(&eb->read_lock_wq);
atomic_dec(&eb->read_locks);
}
@@ -275,12 +266,9 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
if (blockers) {
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_dec(&eb->blocking_writers);
- /*
- * Make sure counter is updated before we wake up waiters.
- */
+ /* Use the lighter barrier after atomic */
smp_mb__after_atomic();
- if (waitqueue_active(&eb->write_lock_wq))
- wake_up(&eb->write_lock_wq);
+ cond_wake_up_nomb(&eb->write_lock_wq);
} else {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 0667ea07f766..b6a4cc178bee 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -17,6 +17,43 @@
#define LZO_LEN 4
+/*
+ * Btrfs LZO compression format
+ *
+ * Regular and inlined LZO compressed data extents consist of:
+ *
+ * 1. Header
+ * Fixed size. LZO_LEN (4) bytes long, LE32.
+ * Records the total size (including the header) of compressed data.
+ *
+ * 2. Segment(s)
+ * Variable size. Each segment includes one segment header, followd by data
+ * payload.
+ * One regular LZO compressed extent can have one or more segments.
+ * For inlined LZO compressed extent, only one segment is allowed.
+ * One segment represents at most one page of uncompressed data.
+ *
+ * 2.1 Segment header
+ * Fixed size. LZO_LEN (4) bytes long, LE32.
+ * Records the total size of the segment (not including the header).
+ * Segment header never crosses page boundary, thus it's possible to
+ * have at most 3 padding zeros at the end of the page.
+ *
+ * 2.2 Data Payload
+ * Variable size. Size up limit should be lzo1x_worst_compress(PAGE_SIZE)
+ * which is 4419 for a 4KiB page.
+ *
+ * Example:
+ * Page 1:
+ * 0 0x2 0x4 0x6 0x8 0xa 0xc 0xe 0x10
+ * 0x0000 | Header | SegHdr 01 | Data payload 01 ... |
+ * ...
+ * 0x0ff0 | SegHdr N | Data payload N ... |00|
+ * ^^ padding zeros
+ * Page 2:
+ * 0x1000 | SegHdr N+1| Data payload N+1 ... |
+ */
+
struct workspace {
void *mem;
void *buf; /* where decompressed data goes */
@@ -258,6 +295,7 @@ static int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
unsigned long working_bytes;
size_t in_len;
size_t out_len;
+ const size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
unsigned long in_offset;
unsigned long in_page_bytes_left;
unsigned long tot_in;
@@ -271,10 +309,22 @@ static int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
data_in = kmap(pages_in[0]);
tot_len = read_compress_length(data_in);
+ /*
+ * Compressed data header check.
+ *
+ * The real compressed size can't exceed the maximum extent length, and
+ * all pages should be used (whole unused page with just the segment
+ * header is not possible). If this happens it means the compressed
+ * extent is corrupted.
+ */
+ if (tot_len > min_t(size_t, BTRFS_MAX_COMPRESSED, srclen) ||
+ tot_len < srclen - PAGE_SIZE) {
+ ret = -EUCLEAN;
+ goto done;
+ }
tot_in = LZO_LEN;
in_offset = LZO_LEN;
- tot_len = min_t(size_t, srclen, tot_len);
in_page_bytes_left = PAGE_SIZE - LZO_LEN;
tot_out = 0;
@@ -285,6 +335,17 @@ static int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
in_offset += LZO_LEN;
tot_in += LZO_LEN;
+ /*
+ * Segment header check.
+ *
+ * The segment length must not exceed the maximum LZO
+ * compression size, nor the total compressed size.
+ */
+ if (in_len > max_segment_len || tot_in + in_len > tot_len) {
+ ret = -EUCLEAN;
+ goto done;
+ }
+
tot_in += in_len;
working_bytes = in_len;
may_late_unmap = need_unmap = false;
@@ -335,7 +396,7 @@ cont:
}
}
- out_len = lzo1x_worst_compress(PAGE_SIZE);
+ out_len = max_segment_len;
ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
&out_len);
if (need_unmap)
@@ -369,15 +430,24 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
struct workspace *workspace = list_entry(ws, struct workspace, list);
size_t in_len;
size_t out_len;
+ size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
int ret = 0;
char *kaddr;
unsigned long bytes;
- BUG_ON(srclen < LZO_LEN);
+ if (srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2)
+ return -EUCLEAN;
+ in_len = read_compress_length(data_in);
+ if (in_len != srclen)
+ return -EUCLEAN;
data_in += LZO_LEN;
in_len = read_compress_length(data_in);
+ if (in_len != srclen - LZO_LEN * 2) {
+ ret = -EUCLEAN;
+ goto out;
+ }
data_in += LZO_LEN;
out_len = PAGE_SIZE;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 6db8bb2f2c28..2e1a1694a33d 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -343,11 +343,8 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
if (entry->bytes_left == 0) {
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
- /*
- * Implicit memory barrier after test_and_set_bit
- */
- if (waitqueue_active(&entry->wait))
- wake_up(&entry->wait);
+ /* test_and_set_bit implies a barrier */
+ cond_wake_up_nomb(&entry->wait);
} else {
ret = 1;
}
@@ -410,11 +407,8 @@ have_entry:
if (entry->bytes_left == 0) {
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
- /*
- * Implicit memory barrier after test_and_set_bit
- */
- if (waitqueue_active(&entry->wait))
- wake_up(&entry->wait);
+ /* test_and_set_bit implies a barrier */
+ cond_wake_up_nomb(&entry->wait);
} else {
ret = 1;
}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 21a831d3d087..a4e11cf04671 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -166,6 +166,25 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
}
}
+/*
+ * Helper to output refs and locking status of extent buffer. Useful to debug
+ * race condition related problems.
+ */
+static void print_eb_refs_lock(struct extent_buffer *eb)
+{
+#ifdef CONFIG_BTRFS_DEBUG
+ btrfs_info(eb->fs_info,
+"refs %u lock (w:%d r:%d bw:%d br:%d sw:%d sr:%d) lock_owner %u current %u",
+ atomic_read(&eb->refs), atomic_read(&eb->write_locks),
+ atomic_read(&eb->read_locks),
+ atomic_read(&eb->blocking_writers),
+ atomic_read(&eb->blocking_readers),
+ atomic_read(&eb->spinning_writers),
+ atomic_read(&eb->spinning_readers),
+ eb->lock_owner, current->pid);
+#endif
+}
+
void btrfs_print_leaf(struct extent_buffer *l)
{
struct btrfs_fs_info *fs_info;
@@ -193,6 +212,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
"leaf %llu gen %llu total ptrs %d free space %d owner %llu",
btrfs_header_bytenr(l), btrfs_header_generation(l), nr,
btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l));
+ print_eb_refs_lock(l);
for (i = 0 ; i < nr ; i++) {
item = btrfs_item_nr(i);
btrfs_item_key_to_cpu(l, &key, i);
@@ -347,6 +367,7 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow)
btrfs_header_bytenr(c), level, btrfs_header_generation(c),
nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr,
btrfs_header_owner(c));
+ print_eb_refs_lock(c);
for (i = 0; i < nr; i++) {
btrfs_node_key_to_cpu(c, &key, i);
pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n",
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 53a8c95828e3..dc6140013ae8 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -380,6 +380,7 @@ static int prop_compression_apply(struct inode *inode,
const char *value,
size_t len)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int type;
if (len == 0) {
@@ -390,14 +391,17 @@ static int prop_compression_apply(struct inode *inode,
return 0;
}
- if (!strncmp("lzo", value, 3))
+ if (!strncmp("lzo", value, 3)) {
type = BTRFS_COMPRESS_LZO;
- else if (!strncmp("zlib", value, 4))
+ btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
+ } else if (!strncmp("zlib", value, 4)) {
type = BTRFS_COMPRESS_ZLIB;
- else if (!strncmp("zstd", value, len))
+ } else if (!strncmp("zstd", value, len)) {
type = BTRFS_COMPRESS_ZSTD;
- else
+ btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
+ } else {
return -EINVAL;
+ }
BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9fb758d5077a..1874a6d2e6f5 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1882,8 +1882,8 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
- trace_qgroup_update_counters(fs_info, qg->qgroupid,
- cur_old_count, cur_new_count);
+ trace_qgroup_update_counters(fs_info, qg, cur_old_count,
+ cur_new_count);
/* Rfer update part */
if (cur_old_count == 0 && cur_new_count > 0) {
@@ -2014,8 +2014,8 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
BUG_ON(!fs_info->quota_root);
- trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes,
- nr_old_roots, nr_new_roots);
+ trace_btrfs_qgroup_account_extent(fs_info, trans->transid, bytenr,
+ num_bytes, nr_old_roots, nr_new_roots);
qgroups = ulist_alloc(GFP_NOFS);
if (!qgroups) {
@@ -2580,6 +2580,21 @@ out:
}
/*
+ * Check if the leaf is the last leaf. Which means all node pointers
+ * are at their last position.
+ */
+static bool is_last_leaf(struct btrfs_path *path)
+{
+ int i;
+
+ for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
+ if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1)
+ return false;
+ }
+ return true;
+}
+
+/*
* returns < 0 on error, 0 when more leafs are to be scanned.
* returns 1 when done.
*/
@@ -2590,8 +2605,8 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
struct btrfs_key found;
struct extent_buffer *scratch_leaf = NULL;
struct ulist *roots = NULL;
- struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
u64 num_bytes;
+ bool done;
int slot;
int ret;
@@ -2620,12 +2635,12 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
mutex_unlock(&fs_info->qgroup_rescan_lock);
return ret;
}
+ done = is_last_leaf(path);
btrfs_item_key_to_cpu(path->nodes[0], &found,
btrfs_header_nritems(path->nodes[0]) - 1);
fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
- btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
if (!scratch_leaf) {
ret = -ENOMEM;
@@ -2664,8 +2679,9 @@ out:
btrfs_tree_read_unlock_blocking(scratch_leaf);
free_extent_buffer(scratch_leaf);
}
- btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+ if (done && !ret)
+ ret = 1;
return ret;
}
@@ -2681,6 +2697,12 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
path = btrfs_alloc_path();
if (!path)
goto out;
+ /*
+ * Rescan should only search for commit root, and any later difference
+ * should be recorded by qgroup
+ */
+ path->search_commit_root = 1;
+ path->skip_locking = 1;
err = 0;
while (!err && !btrfs_fs_closing(fs_info)) {
@@ -2760,26 +2782,36 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
{
int ret = 0;
- if (!init_flags &&
- (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
- !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
- ret = -EINVAL;
- goto err;
+ if (!init_flags) {
+ /* we're resuming qgroup rescan at mount time */
+ if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN))
+ btrfs_warn(fs_info,
+ "qgroup rescan init failed, qgroup is not enabled");
+ else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+ btrfs_warn(fs_info,
+ "qgroup rescan init failed, qgroup rescan is not queued");
+ return -EINVAL;
}
mutex_lock(&fs_info->qgroup_rescan_lock);
spin_lock(&fs_info->qgroup_lock);
if (init_flags) {
- if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ btrfs_warn(fs_info,
+ "qgroup rescan is already in progress");
ret = -EINPROGRESS;
- else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+ } else if (!(fs_info->qgroup_flags &
+ BTRFS_QGROUP_STATUS_FLAG_ON)) {
+ btrfs_warn(fs_info,
+ "qgroup rescan init failed, qgroup is not enabled");
ret = -EINVAL;
+ }
if (ret) {
spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock);
- goto err;
+ return ret;
}
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
}
@@ -2798,13 +2830,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
btrfs_init_work(&fs_info->qgroup_rescan_work,
btrfs_qgroup_rescan_helper,
btrfs_qgroup_rescan_worker, NULL, NULL);
-
- if (ret) {
-err:
- btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
- return ret;
- }
-
return 0;
}
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 9abd950e7f78..5e4ad134b9ad 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -163,6 +163,12 @@ struct btrfs_raid_bio {
* bitmap to record which horizontal stripe has data
*/
unsigned long *dbitmap;
+
+ /* allocated with real_stripes-many pointers for finish_*() calls */
+ void **finish_pointers;
+
+ /* allocated with stripe_npages-many bits for finish_*() calls */
+ unsigned long *finish_pbitmap;
};
static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
@@ -981,9 +987,14 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
void *p;
- rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
- DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
- sizeof(long), GFP_NOFS);
+ rbio = kzalloc(sizeof(*rbio) +
+ sizeof(*rbio->stripe_pages) * num_pages +
+ sizeof(*rbio->bio_pages) * num_pages +
+ sizeof(*rbio->finish_pointers) * real_stripes +
+ sizeof(*rbio->dbitmap) * BITS_TO_LONGS(stripe_npages) +
+ sizeof(*rbio->finish_pbitmap) *
+ BITS_TO_LONGS(stripe_npages),
+ GFP_NOFS);
if (!rbio)
return ERR_PTR(-ENOMEM);
@@ -1005,13 +1016,20 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
atomic_set(&rbio->stripes_pending, 0);
/*
- * the stripe_pages and bio_pages array point to the extra
+ * the stripe_pages, bio_pages, etc arrays point to the extra
* memory we allocated past the end of the rbio
*/
p = rbio + 1;
- rbio->stripe_pages = p;
- rbio->bio_pages = p + sizeof(struct page *) * num_pages;
- rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
+#define CONSUME_ALLOC(ptr, count) do { \
+ ptr = p; \
+ p = (unsigned char *)p + sizeof(*(ptr)) * (count); \
+ } while (0)
+ CONSUME_ALLOC(rbio->stripe_pages, num_pages);
+ CONSUME_ALLOC(rbio->bio_pages, num_pages);
+ CONSUME_ALLOC(rbio->finish_pointers, real_stripes);
+ CONSUME_ALLOC(rbio->dbitmap, BITS_TO_LONGS(stripe_npages));
+ CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages));
+#undef CONSUME_ALLOC
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
nr_data = real_stripes - 1;
@@ -1180,7 +1198,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
{
struct btrfs_bio *bbio = rbio->bbio;
- void *pointers[rbio->real_stripes];
+ void **pointers = rbio->finish_pointers;
int nr_data = rbio->nr_data;
int stripe;
int pagenr;
@@ -2350,8 +2368,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
int need_check)
{
struct btrfs_bio *bbio = rbio->bbio;
- void *pointers[rbio->real_stripes];
- DECLARE_BITMAP(pbitmap, rbio->stripe_npages);
+ void **pointers = rbio->finish_pointers;
+ unsigned long *pbitmap = rbio->finish_pbitmap;
int nr_data = rbio->nr_data;
int stripe;
int pagenr;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 00b7d3231821..879b76fa881a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1841,7 +1841,7 @@ again:
old_bytenr = btrfs_node_blockptr(parent, slot);
blocksize = fs_info->nodesize;
old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
- btrfs_node_key_to_cpu(parent, &key, slot);
+ btrfs_node_key_to_cpu(parent, &first_key, slot);
if (level <= max_level) {
eb = path->nodes[level];
@@ -4299,7 +4299,7 @@ out:
return inode;
}
-static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
+static struct reloc_control *alloc_reloc_control(void)
{
struct reloc_control *rc;
@@ -4344,7 +4344,7 @@ static void describe_relocation(struct btrfs_fs_info *fs_info,
DESCRIBE_FLAG(RAID5, "raid5");
DESCRIBE_FLAG(RAID6, "raid6");
if (flags)
- snprintf(buf, buf - bp + sizeof(buf), "|0x%llx", flags);
+ snprintf(bp, buf - bp + sizeof(buf), "|0x%llx", flags);
#undef DESCRIBE_FLAG
}
@@ -4366,7 +4366,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
int rw = 0;
int err = 0;
- rc = alloc_reloc_control(fs_info);
+ rc = alloc_reloc_control();
if (!rc)
return -ENOMEM;
@@ -4562,7 +4562,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
if (list_empty(&reloc_roots))
goto out;
- rc = alloc_reloc_control(fs_info);
+ rc = alloc_reloc_control();
if (!rc) {
err = -ENOMEM;
goto out;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 52b39a0924e9..a59005862010 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3984,6 +3984,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
spin_lock(&fs_info->unused_bgs_lock);
if (list_empty(&cache->bg_list)) {
btrfs_get_block_group(cache);
+ trace_btrfs_add_unused_block_group(cache);
list_add_tail(&cache->bg_list,
&fs_info->unused_bgs);
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 221e5cdb060b..c47f62b19226 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -235,6 +235,7 @@ struct orphan_dir_info {
struct rb_node node;
u64 ino;
u64 gen;
+ u64 last_dir_index_offset;
};
struct name_cache_entry {
@@ -2844,12 +2845,6 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
struct rb_node *parent = NULL;
struct orphan_dir_info *entry, *odi;
- odi = kmalloc(sizeof(*odi), GFP_KERNEL);
- if (!odi)
- return ERR_PTR(-ENOMEM);
- odi->ino = dir_ino;
- odi->gen = 0;
-
while (*p) {
parent = *p;
entry = rb_entry(parent, struct orphan_dir_info, node);
@@ -2858,11 +2853,17 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
} else if (dir_ino > entry->ino) {
p = &(*p)->rb_right;
} else {
- kfree(odi);
return entry;
}
}
+ odi = kmalloc(sizeof(*odi), GFP_KERNEL);
+ if (!odi)
+ return ERR_PTR(-ENOMEM);
+ odi->ino = dir_ino;
+ odi->gen = 0;
+ odi->last_dir_index_offset = 0;
+
rb_link_node(&odi->node, parent, p);
rb_insert_color(&odi->node, &sctx->orphan_dirs);
return odi;
@@ -2917,6 +2918,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
struct btrfs_key found_key;
struct btrfs_key loc;
struct btrfs_dir_item *di;
+ struct orphan_dir_info *odi = NULL;
/*
* Don't try to rmdir the top/root subvolume dir.
@@ -2931,6 +2933,11 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
key.objectid = dir;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = 0;
+
+ odi = get_orphan_dir_info(sctx, dir);
+ if (odi)
+ key.offset = odi->last_dir_index_offset;
+
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
@@ -2958,30 +2965,33 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
dm = get_waiting_dir_move(sctx, loc.objectid);
if (dm) {
- struct orphan_dir_info *odi;
-
odi = add_orphan_dir_info(sctx, dir);
if (IS_ERR(odi)) {
ret = PTR_ERR(odi);
goto out;
}
odi->gen = dir_gen;
+ odi->last_dir_index_offset = found_key.offset;
dm->rmdir_ino = dir;
ret = 0;
goto out;
}
if (loc.objectid > send_progress) {
- struct orphan_dir_info *odi;
-
- odi = get_orphan_dir_info(sctx, dir);
- free_orphan_dir_info(sctx, odi);
+ odi = add_orphan_dir_info(sctx, dir);
+ if (IS_ERR(odi)) {
+ ret = PTR_ERR(odi);
+ goto out;
+ }
+ odi->gen = dir_gen;
+ odi->last_dir_index_offset = found_key.offset;
ret = 0;
goto out;
}
path->slots[0]++;
}
+ free_orphan_dir_info(sctx, odi);
ret = 1;
@@ -3259,13 +3269,16 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
if (rmdir_ino) {
struct orphan_dir_info *odi;
+ u64 gen;
odi = get_orphan_dir_info(sctx, rmdir_ino);
if (!odi) {
/* already deleted */
goto finish;
}
- ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
+ gen = odi->gen;
+
+ ret = can_rmdir(sctx, rmdir_ino, gen, sctx->cur_ino);
if (ret < 0)
goto out;
if (!ret)
@@ -3276,13 +3289,12 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
ret = -ENOMEM;
goto out;
}
- ret = get_cur_path(sctx, rmdir_ino, odi->gen, name);
+ ret = get_cur_path(sctx, rmdir_ino, gen, name);
if (ret < 0)
goto out;
ret = send_rmdir(sctx, name);
if (ret < 0)
goto out;
- free_orphan_dir_info(sctx, odi);
}
finish:
@@ -5236,6 +5248,10 @@ static int send_write_or_clone(struct send_ctx *sctx,
len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
}
+ if (offset >= sctx->cur_inode_size) {
+ ret = 0;
+ goto out;
+ }
if (offset + len > sctx->cur_inode_size)
len = sctx->cur_inode_size - offset;
if (len == 0) {
@@ -6450,7 +6466,7 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
*/
if (root->send_in_progress < 0)
btrfs_err(root->fs_info,
- "send_in_progres unbalanced %d root %llu",
+ "send_in_progress unbalanced %d root %llu",
root->send_in_progress, root->root_key.objectid);
spin_unlock(&root->root_item_lock);
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0628092b0b1b..81107ad49f3a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -323,6 +323,7 @@ enum {
Opt_ssd, Opt_nossd,
Opt_ssd_spread, Opt_nossd_spread,
Opt_subvol,
+ Opt_subvol_empty,
Opt_subvolid,
Opt_thread_pool,
Opt_treelog, Opt_notreelog,
@@ -388,6 +389,7 @@ static const match_table_t tokens = {
{Opt_ssd_spread, "ssd_spread"},
{Opt_nossd_spread, "nossd_spread"},
{Opt_subvol, "subvol=%s"},
+ {Opt_subvol_empty, "subvol="},
{Opt_subvolid, "subvolid=%s"},
{Opt_thread_pool, "thread_pool=%u"},
{Opt_treelog, "treelog"},
@@ -461,6 +463,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_opt(info->mount_opt, DEGRADED);
break;
case Opt_subvol:
+ case Opt_subvol_empty:
case Opt_subvolid:
case Opt_subvolrootid:
case Opt_device:
@@ -1782,10 +1785,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
}
ret = btrfs_parse_options(fs_info, data, *flags);
- if (ret) {
- ret = -EINVAL;
+ if (ret)
goto restore;
- }
btrfs_remount_begin(fs_info, old_opts, *flags);
btrfs_resize_thread_pool(fs_info,
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4848a4318fb5..4a4e960c7c66 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -210,12 +210,42 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
NULL
};
+/*
+ * Features which depend on feature bits and may differ between each fs.
+ *
+ * /sys/fs/btrfs/features lists all available features of this kernel while
+ * /sys/fs/btrfs/UUID/features shows features of the fs which are enabled or
+ * can be changed online.
+ */
static const struct attribute_group btrfs_feature_attr_group = {
.name = "features",
.is_visible = btrfs_feature_visible,
.attrs = btrfs_supported_feature_attrs,
};
+static ssize_t rmdir_subvol_show(struct kobject *kobj,
+ struct kobj_attribute *ka, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0\n");
+}
+BTRFS_ATTR(static_feature, rmdir_subvol, rmdir_subvol_show);
+
+static struct attribute *btrfs_supported_static_feature_attrs[] = {
+ BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
+ NULL
+};
+
+/*
+ * Features which only depend on kernel version.
+ *
+ * These are listed in /sys/fs/btrfs/features along with
+ * btrfs_feature_attr_group
+ */
+static const struct attribute_group btrfs_static_feature_attr_group = {
+ .name = "features",
+ .attrs = btrfs_supported_static_feature_attrs,
+};
+
static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
{
u64 val;
@@ -514,10 +544,11 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
}
#define NUM_FEATURE_BITS 64
-static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13];
-static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS];
+#define BTRFS_FEATURE_NAME_MAX 13
+static char btrfs_unknown_feature_names[FEAT_MAX][NUM_FEATURE_BITS][BTRFS_FEATURE_NAME_MAX];
+static struct btrfs_feature_attr btrfs_feature_attrs[FEAT_MAX][NUM_FEATURE_BITS];
-static const u64 supported_feature_masks[3] = {
+static const u64 supported_feature_masks[FEAT_MAX] = {
[FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP,
[FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
[FEAT_INCOMPAT] = BTRFS_FEATURE_INCOMPAT_SUPP,
@@ -589,7 +620,7 @@ void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
return;
}
- list_for_each_entry(fs_devs, fs_uuids, list) {
+ list_for_each_entry(fs_devs, fs_uuids, fs_list) {
__btrfs_sysfs_remove_fsid(fs_devs);
}
}
@@ -609,7 +640,7 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
btrfs_sysfs_rm_device_link(fs_info->fs_devices, NULL);
}
-const char * const btrfs_feature_set_names[3] = {
+const char * const btrfs_feature_set_names[FEAT_MAX] = {
[FEAT_COMPAT] = "compat",
[FEAT_COMPAT_RO] = "compat_ro",
[FEAT_INCOMPAT] = "incompat",
@@ -673,7 +704,7 @@ static void init_feature_attrs(void)
if (fa->kobj_attr.attr.name)
continue;
- snprintf(name, 13, "%s:%u",
+ snprintf(name, BTRFS_FEATURE_NAME_MAX, "%s:%u",
btrfs_feature_set_names[set], i);
fa->kobj_attr.attr.name = name;
@@ -900,8 +931,15 @@ int __init btrfs_init_sysfs(void)
ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
if (ret)
goto out2;
+ ret = sysfs_merge_group(&btrfs_kset->kobj,
+ &btrfs_static_feature_attr_group);
+ if (ret)
+ goto out_remove_group;
return 0;
+
+out_remove_group:
+ sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
out2:
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
out1:
@@ -912,6 +950,8 @@ out1:
void __cold btrfs_exit_sysfs(void)
{
+ sysfs_unmerge_group(&btrfs_kset->kobj,
+ &btrfs_static_feature_attr_group);
sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
kset_unregister(btrfs_kset);
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index b567560d9aa9..c6ee600aff89 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -9,7 +9,7 @@
extern u64 btrfs_debugfs_test;
enum btrfs_feature_set {
- FEAT_COMPAT,
+ FEAT_COMPAT = 0,
FEAT_COMPAT_RO,
FEAT_INCOMPAT,
FEAT_MAX
@@ -77,7 +77,7 @@ attr_to_btrfs_feature_attr(struct attribute *attr)
}
char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
-extern const char * const btrfs_feature_set_names[3];
+extern const char * const btrfs_feature_set_names[FEAT_MAX];
extern struct kobj_type space_info_ktype;
extern struct kobj_type btrfs_raid_ktype;
int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 30ed438da2a9..db72b3b6209e 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -219,11 +219,13 @@ void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache)
kfree(cache);
}
-void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans)
+void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
{
memset(trans, 0, sizeof(*trans));
trans->transid = 1;
trans->type = __TRANS_DUMMY;
+ trans->fs_info = fs_info;
}
int btrfs_run_sanity_tests(void)
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index a5a0b9500d3e..70ff9f9d86a1 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -9,7 +9,8 @@
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int btrfs_run_sanity_tests(void);
-#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__)
+#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__)
+#define test_err(fmt, ...) pr_err("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__)
struct btrfs_root;
struct btrfs_trans_handle;
@@ -28,7 +29,8 @@ void btrfs_free_dummy_root(struct btrfs_root *root);
struct btrfs_block_group_cache *
btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info, unsigned long length);
void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache);
-void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans);
+void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info);
#else
static inline int btrfs_run_sanity_tests(void)
{
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
index 31e8a9ec228c..7d72eab6d32c 100644
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -26,31 +26,31 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
u32 value_len = strlen(value);
int ret = 0;
- test_msg("Running btrfs_split_item tests\n");
+ test_msg("running btrfs_split_item tests");
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_msg("Could not allocate fs_info\n");
+ test_err("could not allocate fs_info");
return -ENOMEM;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_msg("Could not allocate root\n");
+ test_err("could not allocate root");
ret = PTR_ERR(root);
goto out;
}
path = btrfs_alloc_path();
if (!path) {
- test_msg("Could not allocate path\n");
+ test_err("could not allocate path");
ret = -ENOMEM;
goto out;
}
path->nodes[0] = eb = alloc_dummy_extent_buffer(fs_info, nodesize);
if (!eb) {
- test_msg("Could not allocate dummy buffer\n");
+ test_err("could not allocate dummy buffer");
ret = -ENOMEM;
goto out;
}
@@ -75,7 +75,7 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
*/
ret = btrfs_split_item(NULL, root, path, &key, 17);
if (ret) {
- test_msg("Split item failed %d\n", ret);
+ test_err("split item failed %d", ret);
goto out;
}
@@ -86,14 +86,14 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
btrfs_item_key_to_cpu(eb, &key, 0);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 0) {
- test_msg("Invalid key at slot 0\n");
+ test_err("invalid key at slot 0");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(0);
if (btrfs_item_size(eb, item) != strlen(split1)) {
- test_msg("Invalid len in the first split\n");
+ test_err("invalid len in the first split");
ret = -EINVAL;
goto out;
}
@@ -101,8 +101,8 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
strlen(split1));
if (memcmp(buf, split1, strlen(split1))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the first split have='%.*s' want '%s'\n",
+ test_err(
+"data in the buffer doesn't match what it should in the first split have='%.*s' want '%s'",
(int)strlen(split1), buf, split1);
ret = -EINVAL;
goto out;
@@ -111,14 +111,14 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
btrfs_item_key_to_cpu(eb, &key, 1);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 3) {
- test_msg("Invalid key at slot 1\n");
+ test_err("invalid key at slot 1");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(1);
if (btrfs_item_size(eb, item) != strlen(split2)) {
- test_msg("Invalid len in the second split\n");
+ test_err("invalid len in the second split");
ret = -EINVAL;
goto out;
}
@@ -126,8 +126,8 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
strlen(split2));
if (memcmp(buf, split2, strlen(split2))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the second split\n");
+ test_err(
+ "data in the buffer doesn't match what it should in the second split");
ret = -EINVAL;
goto out;
}
@@ -136,21 +136,21 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
/* Do it again so we test memmoving the other items in the leaf */
ret = btrfs_split_item(NULL, root, path, &key, 4);
if (ret) {
- test_msg("Second split item failed %d\n", ret);
+ test_err("second split item failed %d", ret);
goto out;
}
btrfs_item_key_to_cpu(eb, &key, 0);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 0) {
- test_msg("Invalid key at slot 0\n");
+ test_err("invalid key at slot 0");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(0);
if (btrfs_item_size(eb, item) != strlen(split3)) {
- test_msg("Invalid len in the first split\n");
+ test_err("invalid len in the first split");
ret = -EINVAL;
goto out;
}
@@ -158,8 +158,8 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
strlen(split3));
if (memcmp(buf, split3, strlen(split3))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the third split");
+ test_err(
+ "data in the buffer doesn't match what it should in the third split");
ret = -EINVAL;
goto out;
}
@@ -167,14 +167,14 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
btrfs_item_key_to_cpu(eb, &key, 1);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 1) {
- test_msg("Invalid key at slot 1\n");
+ test_err("invalid key at slot 1");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(1);
if (btrfs_item_size(eb, item) != strlen(split4)) {
- test_msg("Invalid len in the second split\n");
+ test_err("invalid len in the second split");
ret = -EINVAL;
goto out;
}
@@ -182,8 +182,8 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
strlen(split4));
if (memcmp(buf, split4, strlen(split4))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the fourth split\n");
+ test_err(
+ "data in the buffer doesn't match what it should in the fourth split");
ret = -EINVAL;
goto out;
}
@@ -191,14 +191,14 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
btrfs_item_key_to_cpu(eb, &key, 2);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 3) {
- test_msg("Invalid key at slot 2\n");
+ test_err("invalid key at slot 2");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(2);
if (btrfs_item_size(eb, item) != strlen(split2)) {
- test_msg("Invalid len in the second split\n");
+ test_err("invalid len in the second split");
ret = -EINVAL;
goto out;
}
@@ -206,8 +206,8 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 2),
strlen(split2));
if (memcmp(buf, split2, strlen(split2))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the last chunk\n");
+ test_err(
+ "data in the buffer doesn't match what it should in the last chunk");
ret = -EINVAL;
goto out;
}
@@ -220,6 +220,6 @@ out:
int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize)
{
- test_msg("Running extent buffer operation tests\n");
+ test_msg("running extent buffer operation tests");
return test_btrfs_split_item(sectorsize, nodesize);
}
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 76aa5a678a96..d9269a531a4d 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -46,7 +46,9 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
cond_resched();
loops++;
if (loops > 100000) {
- printk(KERN_ERR "stuck in a loop, start %Lu, end %Lu, nr_pages %lu, ret %d\n", start, end, nr_pages, ret);
+ printk(KERN_ERR
+ "stuck in a loop, start %llu, end %llu, nr_pages %lu, ret %d\n",
+ start, end, nr_pages, ret);
break;
}
}
@@ -66,11 +68,11 @@ static int test_find_delalloc(u32 sectorsize)
u64 found;
int ret = -EINVAL;
- test_msg("Running find delalloc tests\n");
+ test_msg("running find delalloc tests");
inode = btrfs_new_test_inode();
if (!inode) {
- test_msg("Failed to allocate test inode\n");
+ test_err("failed to allocate test inode");
return -ENOMEM;
}
@@ -84,7 +86,7 @@ static int test_find_delalloc(u32 sectorsize)
for (index = 0; index < (total_dirty >> PAGE_SHIFT); index++) {
page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
if (!page) {
- test_msg("Failed to allocate test page\n");
+ test_err("failed to allocate test page");
ret = -ENOMEM;
goto out;
}
@@ -107,11 +109,11 @@ static int test_find_delalloc(u32 sectorsize)
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (!found) {
- test_msg("Should have found at least one delalloc\n");
+ test_err("should have found at least one delalloc");
goto out_bits;
}
if (start != 0 || end != (sectorsize - 1)) {
- test_msg("Expected start 0 end %u, got start %llu end %llu\n",
+ test_err("expected start 0 end %u, got start %llu end %llu",
sectorsize - 1, start, end);
goto out_bits;
}
@@ -129,7 +131,7 @@ static int test_find_delalloc(u32 sectorsize)
locked_page = find_lock_page(inode->i_mapping,
test_start >> PAGE_SHIFT);
if (!locked_page) {
- test_msg("Couldn't find the locked page\n");
+ test_err("couldn't find the locked page");
goto out_bits;
}
set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, 0, NULL);
@@ -138,17 +140,17 @@ static int test_find_delalloc(u32 sectorsize)
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (!found) {
- test_msg("Couldn't find delalloc in our range\n");
+ test_err("couldn't find delalloc in our range");
goto out_bits;
}
if (start != test_start || end != max_bytes - 1) {
- test_msg("Expected start %Lu end %Lu, got start %Lu, end "
- "%Lu\n", test_start, max_bytes - 1, start, end);
+ test_err("expected start %llu end %llu, got start %llu, end %llu",
+ test_start, max_bytes - 1, start, end);
goto out_bits;
}
if (process_page_range(inode, start, end,
PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
- test_msg("There were unlocked pages in the range\n");
+ test_err("there were unlocked pages in the range");
goto out_bits;
}
unlock_extent(&tmp, start, end);
@@ -164,7 +166,7 @@ static int test_find_delalloc(u32 sectorsize)
locked_page = find_lock_page(inode->i_mapping, test_start >>
PAGE_SHIFT);
if (!locked_page) {
- test_msg("Couldn't find the locked page\n");
+ test_err("couldn't find the locked page");
goto out_bits;
}
start = test_start;
@@ -172,11 +174,11 @@ static int test_find_delalloc(u32 sectorsize)
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (found) {
- test_msg("Found range when we shouldn't have\n");
+ test_err("found range when we shouldn't have");
goto out_bits;
}
if (end != (u64)-1) {
- test_msg("Did not return the proper end offset\n");
+ test_err("did not return the proper end offset");
goto out_bits;
}
@@ -193,17 +195,17 @@ static int test_find_delalloc(u32 sectorsize)
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (!found) {
- test_msg("Didn't find our range\n");
+ test_err("didn't find our range");
goto out_bits;
}
if (start != test_start || end != total_dirty - 1) {
- test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
+ test_err("expected start %llu end %llu, got start %llu end %llu",
test_start, total_dirty - 1, start, end);
goto out_bits;
}
if (process_page_range(inode, start, end,
PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
- test_msg("Pages in range were not all locked\n");
+ test_err("pages in range were not all locked");
goto out_bits;
}
unlock_extent(&tmp, start, end);
@@ -215,7 +217,7 @@ static int test_find_delalloc(u32 sectorsize)
page = find_get_page(inode->i_mapping,
(max_bytes + SZ_1M) >> PAGE_SHIFT);
if (!page) {
- test_msg("Couldn't find our page\n");
+ test_err("couldn't find our page");
goto out_bits;
}
ClearPageDirty(page);
@@ -234,18 +236,17 @@ static int test_find_delalloc(u32 sectorsize)
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (!found) {
- test_msg("Didn't find our range\n");
+ test_err("didn't find our range");
goto out_bits;
}
if (start != test_start && end != test_start + PAGE_SIZE - 1) {
- test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
- test_start, test_start + PAGE_SIZE - 1, start,
- end);
+ test_err("expected start %llu end %llu, got start %llu end %llu",
+ test_start, test_start + PAGE_SIZE - 1, start, end);
goto out_bits;
}
if (process_page_range(inode, start, end, PROCESS_TEST_LOCKED |
PROCESS_UNLOCK)) {
- test_msg("Pages in range were not all locked\n");
+ test_err("pages in range were not all locked");
goto out_bits;
}
ret = 0;
@@ -271,14 +272,14 @@ static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb,
bit = !!test_bit(i, bitmap);
bit1 = !!extent_buffer_test_bit(eb, 0, i);
if (bit1 != bit) {
- test_msg("Bits do not match\n");
+ test_err("bits do not match");
return -EINVAL;
}
bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
i % BITS_PER_BYTE);
if (bit1 != bit) {
- test_msg("Offset bits do not match\n");
+ test_err("offset bits do not match");
return -EINVAL;
}
}
@@ -295,7 +296,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
memset(bitmap, 0, len);
memzero_extent_buffer(eb, 0, len);
if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
- test_msg("Bitmap was not zeroed\n");
+ test_err("bitmap was not zeroed");
return -EINVAL;
}
@@ -303,7 +304,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
ret = check_eb_bitmap(bitmap, eb, len);
if (ret) {
- test_msg("Setting all bits failed\n");
+ test_err("setting all bits failed");
return ret;
}
@@ -311,7 +312,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
ret = check_eb_bitmap(bitmap, eb, len);
if (ret) {
- test_msg("Clearing all bits failed\n");
+ test_err("clearing all bits failed");
return ret;
}
@@ -324,7 +325,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
sizeof(long) * BITS_PER_BYTE);
ret = check_eb_bitmap(bitmap, eb, len);
if (ret) {
- test_msg("Setting straddling pages failed\n");
+ test_err("setting straddling pages failed");
return ret;
}
@@ -337,7 +338,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
sizeof(long) * BITS_PER_BYTE);
ret = check_eb_bitmap(bitmap, eb, len);
if (ret) {
- test_msg("Clearing straddling pages failed\n");
+ test_err("clearing straddling pages failed");
return ret;
}
}
@@ -361,7 +362,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
ret = check_eb_bitmap(bitmap, eb, len);
if (ret) {
- test_msg("Random bit pattern failed\n");
+ test_err("random bit pattern failed");
return ret;
}
@@ -376,7 +377,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
struct extent_buffer *eb;
int ret;
- test_msg("Running extent buffer bitmap tests\n");
+ test_msg("running extent buffer bitmap tests");
/*
* In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than
@@ -389,13 +390,13 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
bitmap = kmalloc(len, GFP_KERNEL);
if (!bitmap) {
- test_msg("Couldn't allocate test bitmap\n");
+ test_err("couldn't allocate test bitmap");
return -ENOMEM;
}
eb = __alloc_dummy_extent_buffer(fs_info, 0, len);
if (!eb) {
- test_msg("Couldn't allocate test extent buffer\n");
+ test_err("couldn't allocate test extent buffer");
kfree(bitmap);
return -ENOMEM;
}
@@ -408,7 +409,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
free_extent_buffer(eb);
eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len);
if (!eb) {
- test_msg("Couldn't allocate test extent buffer\n");
+ test_err("couldn't allocate test extent buffer");
kfree(bitmap);
return -ENOMEM;
}
@@ -424,7 +425,7 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
{
int ret;
- test_msg("Running extent I/O tests\n");
+ test_msg("running extent I/O tests");
ret = test_find_delalloc(sectorsize);
if (ret)
@@ -432,6 +433,6 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
ret = test_eb_bitmaps(sectorsize, nodesize);
out:
- test_msg("Extent I/O tests finished\n");
+ test_msg("extent I/O tests finished");
return ret;
}
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 79e0a5f4d9c9..385a5316e4bf 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -19,8 +19,8 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
#ifdef CONFIG_BTRFS_DEBUG
if (refcount_read(&em->refs) != 1) {
- test_msg(
-"em leak: em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx) refs %d\n",
+ test_err(
+"em leak: em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx) refs %d",
em->start, em->len, em->block_start,
em->block_len, refcount_read(&em->refs));
@@ -47,7 +47,8 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
* ->add_extent_mapping(0, 16K)
* -> #handle -EEXIST
*/
-static void test_case_1(struct extent_map_tree *em_tree)
+static void test_case_1(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree)
{
struct extent_map *em;
u64 start = 0;
@@ -90,14 +91,14 @@ static void test_case_1(struct extent_map_tree *em_tree)
em->len = len;
em->block_start = start;
em->block_len = len;
- ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len);
+ ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
if (ret)
- test_msg("case1 [%llu %llu]: ret %d\n", start, start + len, ret);
+ test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
if (em &&
(em->start != 0 || extent_map_end(em) != SZ_16K ||
em->block_start != 0 || em->block_len != SZ_16K))
- test_msg(
-"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu\n",
+ test_err(
+"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
start, start + len, ret, em->start, em->len,
em->block_start, em->block_len);
free_extent_map(em);
@@ -112,7 +113,8 @@ out:
* Reading the inline ending up with EEXIST, ie. read an inline
* extent and discard page cache and read it again.
*/
-static void test_case_2(struct extent_map_tree *em_tree)
+static void test_case_2(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree)
{
struct extent_map *em;
int ret;
@@ -153,14 +155,14 @@ static void test_case_2(struct extent_map_tree *em_tree)
em->len = SZ_1K;
em->block_start = EXTENT_MAP_INLINE;
em->block_len = (u64)-1;
- ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len);
+ ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
if (ret)
- test_msg("case2 [0 1K]: ret %d\n", ret);
+ test_err("case2 [0 1K]: ret %d", ret);
if (em &&
(em->start != 0 || extent_map_end(em) != SZ_1K ||
em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1))
- test_msg(
-"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu\n",
+ test_err(
+"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
ret, em->start, em->len, em->block_start,
em->block_len);
free_extent_map(em);
@@ -169,7 +171,8 @@ out:
free_extent_map_tree(em_tree);
}
-static void __test_case_3(struct extent_map_tree *em_tree, u64 start)
+static void __test_case_3(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree, u64 start)
{
struct extent_map *em;
u64 len = SZ_4K;
@@ -198,9 +201,9 @@ static void __test_case_3(struct extent_map_tree *em_tree, u64 start)
em->len = SZ_16K;
em->block_start = 0;
em->block_len = SZ_16K;
- ret = btrfs_add_extent_mapping(em_tree, &em, start, len);
+ ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
if (ret)
- test_msg("case3 [0x%llx 0x%llx): ret %d\n",
+ test_err("case3 [0x%llx 0x%llx): ret %d",
start, start + len, ret);
/*
* Since bytes within em are contiguous, em->block_start is identical to
@@ -209,8 +212,8 @@ static void __test_case_3(struct extent_map_tree *em_tree, u64 start)
if (em &&
(start < em->start || start + len > extent_map_end(em) ||
em->start != em->block_start || em->len != em->block_len))
- test_msg(
-"case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)\n",
+ test_err(
+"case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
start, start + len, ret, em->start, em->len,
em->block_start, em->block_len);
free_extent_map(em);
@@ -235,14 +238,16 @@ out:
* -> add_extent_mapping()
* -> add_extent_mapping()
*/
-static void test_case_3(struct extent_map_tree *em_tree)
+static void test_case_3(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree)
{
- __test_case_3(em_tree, 0);
- __test_case_3(em_tree, SZ_8K);
- __test_case_3(em_tree, (12 * 1024ULL));
+ __test_case_3(fs_info, em_tree, 0);
+ __test_case_3(fs_info, em_tree, SZ_8K);
+ __test_case_3(fs_info, em_tree, (12 * 1024ULL));
}
-static void __test_case_4(struct extent_map_tree *em_tree, u64 start)
+static void __test_case_4(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree, u64 start)
{
struct extent_map *em;
u64 len = SZ_4K;
@@ -283,14 +288,14 @@ static void __test_case_4(struct extent_map_tree *em_tree, u64 start)
em->len = SZ_32K;
em->block_start = 0;
em->block_len = SZ_32K;
- ret = btrfs_add_extent_mapping(em_tree, &em, start, len);
+ ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
if (ret)
- test_msg("case4 [0x%llx 0x%llx): ret %d\n",
+ test_err("case4 [0x%llx 0x%llx): ret %d",
start, len, ret);
if (em &&
(start < em->start || start + len > extent_map_end(em)))
- test_msg(
-"case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)\n",
+ test_err(
+"case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
start, len, ret, em->start, em->len, em->block_start,
em->block_len);
free_extent_map(em);
@@ -324,30 +329,45 @@ out:
* # handle -EEXIST when adding
* # [0, 32K)
*/
-static void test_case_4(struct extent_map_tree *em_tree)
+static void test_case_4(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree)
{
- __test_case_4(em_tree, 0);
- __test_case_4(em_tree, SZ_4K);
+ __test_case_4(fs_info, em_tree, 0);
+ __test_case_4(fs_info, em_tree, SZ_4K);
}
int btrfs_test_extent_map(void)
{
+ struct btrfs_fs_info *fs_info = NULL;
struct extent_map_tree *em_tree;
- test_msg("Running extent_map tests\n");
+ test_msg("running extent_map tests");
+
+ /*
+ * Note: the fs_info is not set up completely, we only need
+ * fs_info::fsid for the tracepoint.
+ */
+ fs_info = btrfs_alloc_dummy_fs_info(PAGE_SIZE, PAGE_SIZE);
+ if (!fs_info) {
+ test_msg("Couldn't allocate dummy fs info");
+ return -ENOMEM;
+ }
em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL);
if (!em_tree)
/* Skip the test on error. */
- return 0;
+ goto out;
extent_map_tree_init(em_tree);
- test_case_1(em_tree);
- test_case_2(em_tree);
- test_case_3(em_tree);
- test_case_4(em_tree);
+ test_case_1(fs_info, em_tree);
+ test_case_2(fs_info, em_tree);
+ test_case_3(fs_info, em_tree);
+ test_case_4(fs_info, em_tree);
kfree(em_tree);
+out:
+ btrfs_free_dummy_fs_info(fs_info);
+
return 0;
}
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index d3c9f8a59ba5..5c2f77e9439b 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -20,63 +20,63 @@ static int test_extents(struct btrfs_block_group_cache *cache)
{
int ret = 0;
- test_msg("Running extent only tests\n");
+ test_msg("running extent only tests");
/* First just make sure we can remove an entire entry */
ret = btrfs_add_free_space(cache, 0, SZ_4M);
if (ret) {
- test_msg("Error adding initial extents %d\n", ret);
+ test_err("error adding initial extents %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 0, SZ_4M);
if (ret) {
- test_msg("Error removing extent %d\n", ret);
+ test_err("error removing extent %d", ret);
return ret;
}
if (test_check_exists(cache, 0, SZ_4M)) {
- test_msg("Full remove left some lingering space\n");
+ test_err("full remove left some lingering space");
return -1;
}
/* Ok edge and middle cases now */
ret = btrfs_add_free_space(cache, 0, SZ_4M);
if (ret) {
- test_msg("Error adding half extent %d\n", ret);
+ test_err("error adding half extent %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 3 * SZ_1M, SZ_1M);
if (ret) {
- test_msg("Error removing tail end %d\n", ret);
+ test_err("error removing tail end %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 0, SZ_1M);
if (ret) {
- test_msg("Error removing front end %d\n", ret);
+ test_err("error removing front end %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, SZ_2M, 4096);
if (ret) {
- test_msg("Error removing middle piece %d\n", ret);
+ test_err("error removing middle piece %d", ret);
return ret;
}
if (test_check_exists(cache, 0, SZ_1M)) {
- test_msg("Still have space at the front\n");
+ test_err("still have space at the front");
return -1;
}
if (test_check_exists(cache, SZ_2M, 4096)) {
- test_msg("Still have space in the middle\n");
+ test_err("still have space in the middle");
return -1;
}
if (test_check_exists(cache, 3 * SZ_1M, SZ_1M)) {
- test_msg("Still have space at the end\n");
+ test_err("still have space at the end");
return -1;
}
@@ -92,34 +92,34 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache,
u64 next_bitmap_offset;
int ret;
- test_msg("Running bitmap only tests\n");
+ test_msg("running bitmap only tests");
ret = test_add_free_space_entry(cache, 0, SZ_4M, 1);
if (ret) {
- test_msg("Couldn't create a bitmap entry %d\n", ret);
+ test_err("couldn't create a bitmap entry %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 0, SZ_4M);
if (ret) {
- test_msg("Error removing bitmap full range %d\n", ret);
+ test_err("error removing bitmap full range %d", ret);
return ret;
}
if (test_check_exists(cache, 0, SZ_4M)) {
- test_msg("Left some space in bitmap\n");
+ test_err("left some space in bitmap");
return -1;
}
ret = test_add_free_space_entry(cache, 0, SZ_4M, 1);
if (ret) {
- test_msg("Couldn't add to our bitmap entry %d\n", ret);
+ test_err("couldn't add to our bitmap entry %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, SZ_1M, SZ_2M);
if (ret) {
- test_msg("Couldn't remove middle chunk %d\n", ret);
+ test_err("couldn't remove middle chunk %d", ret);
return ret;
}
@@ -133,19 +133,19 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache,
ret = test_add_free_space_entry(cache, next_bitmap_offset - SZ_2M,
SZ_4M, 1);
if (ret) {
- test_msg("Couldn't add space that straddles two bitmaps %d\n",
+ test_err("couldn't add space that straddles two bitmaps %d",
ret);
return ret;
}
ret = btrfs_remove_free_space(cache, next_bitmap_offset - SZ_1M, SZ_2M);
if (ret) {
- test_msg("Couldn't remove overlapping space %d\n", ret);
+ test_err("couldn't remove overlapping space %d", ret);
return ret;
}
if (test_check_exists(cache, next_bitmap_offset - SZ_1M, SZ_2M)) {
- test_msg("Left some space when removing overlapping\n");
+ test_err("left some space when removing overlapping");
return -1;
}
@@ -161,7 +161,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
u64 bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize);
int ret;
- test_msg("Running bitmap and extent tests\n");
+ test_msg("running bitmap and extent tests");
/*
* First let's do something simple, an extent at the same offset as the
@@ -170,42 +170,42 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
*/
ret = test_add_free_space_entry(cache, SZ_4M, SZ_1M, 1);
if (ret) {
- test_msg("Couldn't create bitmap entry %d\n", ret);
+ test_err("couldn't create bitmap entry %d", ret);
return ret;
}
ret = test_add_free_space_entry(cache, 0, SZ_1M, 0);
if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
+ test_err("couldn't add extent entry %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 0, SZ_1M);
if (ret) {
- test_msg("Couldn't remove extent entry %d\n", ret);
+ test_err("couldn't remove extent entry %d", ret);
return ret;
}
if (test_check_exists(cache, 0, SZ_1M)) {
- test_msg("Left remnants after our remove\n");
+ test_err("left remnants after our remove");
return -1;
}
/* Now to add back the extent entry and remove from the bitmap */
ret = test_add_free_space_entry(cache, 0, SZ_1M, 0);
if (ret) {
- test_msg("Couldn't re-add extent entry %d\n", ret);
+ test_err("couldn't re-add extent entry %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, SZ_4M, SZ_1M);
if (ret) {
- test_msg("Couldn't remove from bitmap %d\n", ret);
+ test_err("couldn't remove from bitmap %d", ret);
return ret;
}
if (test_check_exists(cache, SZ_4M, SZ_1M)) {
- test_msg("Left remnants in the bitmap\n");
+ test_err("left remnants in the bitmap");
return -1;
}
@@ -215,18 +215,18 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
*/
ret = test_add_free_space_entry(cache, SZ_1M, SZ_4M, 1);
if (ret) {
- test_msg("Couldn't add to a bitmap %d\n", ret);
+ test_err("couldn't add to a bitmap %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, SZ_512K, 3 * SZ_1M);
if (ret) {
- test_msg("Couldn't remove overlapping space %d\n", ret);
+ test_err("couldn't remove overlapping space %d", ret);
return ret;
}
if (test_check_exists(cache, SZ_512K, 3 * SZ_1M)) {
- test_msg("Left over pieces after removing overlapping\n");
+ test_err("left over pieces after removing overlapping");
return -1;
}
@@ -235,24 +235,24 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
/* Now with the extent entry offset into the bitmap */
ret = test_add_free_space_entry(cache, SZ_4M, SZ_4M, 1);
if (ret) {
- test_msg("Couldn't add space to the bitmap %d\n", ret);
+ test_err("couldn't add space to the bitmap %d", ret);
return ret;
}
ret = test_add_free_space_entry(cache, SZ_2M, SZ_2M, 0);
if (ret) {
- test_msg("Couldn't add extent to the cache %d\n", ret);
+ test_err("couldn't add extent to the cache %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 3 * SZ_1M, SZ_4M);
if (ret) {
- test_msg("Problem removing overlapping space %d\n", ret);
+ test_err("problem removing overlapping space %d", ret);
return ret;
}
if (test_check_exists(cache, 3 * SZ_1M, SZ_4M)) {
- test_msg("Left something behind when removing space");
+ test_err("left something behind when removing space");
return -1;
}
@@ -269,25 +269,25 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
__btrfs_remove_free_space_cache(cache->free_space_ctl);
ret = test_add_free_space_entry(cache, bitmap_offset + SZ_4M, SZ_4M, 1);
if (ret) {
- test_msg("Couldn't add bitmap %d\n", ret);
+ test_err("couldn't add bitmap %d", ret);
return ret;
}
ret = test_add_free_space_entry(cache, bitmap_offset - SZ_1M,
5 * SZ_1M, 0);
if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
+ test_err("couldn't add extent entry %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, bitmap_offset + SZ_1M, 5 * SZ_1M);
if (ret) {
- test_msg("Failed to free our space %d\n", ret);
+ test_err("failed to free our space %d", ret);
return ret;
}
if (test_check_exists(cache, bitmap_offset + SZ_1M, 5 * SZ_1M)) {
- test_msg("Left stuff over\n");
+ test_err("left stuff over");
return -1;
}
@@ -301,19 +301,19 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
*/
ret = test_add_free_space_entry(cache, SZ_1M, SZ_2M, 1);
if (ret) {
- test_msg("Couldn't add bitmap entry %d\n", ret);
+ test_err("couldn't add bitmap entry %d", ret);
return ret;
}
ret = test_add_free_space_entry(cache, 3 * SZ_1M, SZ_1M, 0);
if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
+ test_err("couldn't add extent entry %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, SZ_1M, 3 * SZ_1M);
if (ret) {
- test_msg("Error removing bitmap and extent overlapping %d\n", ret);
+ test_err("error removing bitmap and extent overlapping %d", ret);
return ret;
}
@@ -335,12 +335,14 @@ check_num_extents_and_bitmaps(const struct btrfs_block_group_cache *cache,
const int num_bitmaps)
{
if (cache->free_space_ctl->free_extents != num_extents) {
- test_msg("Incorrect # of extent entries in the cache: %d, expected %d\n",
+ test_err(
+ "incorrect # of extent entries in the cache: %d, expected %d",
cache->free_space_ctl->free_extents, num_extents);
return -EINVAL;
}
if (cache->free_space_ctl->total_bitmaps != num_bitmaps) {
- test_msg("Incorrect # of extent entries in the cache: %d, expected %d\n",
+ test_err(
+ "incorrect # of extent entries in the cache: %d, expected %d",
cache->free_space_ctl->total_bitmaps, num_bitmaps);
return -EINVAL;
}
@@ -358,7 +360,7 @@ static int check_cache_empty(struct btrfs_block_group_cache *cache)
* allocate.
*/
if (cache->free_space_ctl->free_space != 0) {
- test_msg("Cache free space is not 0\n");
+ test_err("cache free space is not 0");
return -EINVAL;
}
@@ -366,7 +368,7 @@ static int check_cache_empty(struct btrfs_block_group_cache *cache)
offset = btrfs_find_space_for_alloc(cache, 0, 4096, 0,
&max_extent_size);
if (offset != 0) {
- test_msg("Space allocation did not fail, returned offset: %llu",
+ test_err("space allocation did not fail, returned offset: %llu",
offset);
return -EINVAL;
}
@@ -402,7 +404,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
};
const struct btrfs_free_space_op *orig_free_space_ops;
- test_msg("Running space stealing from bitmap to extent\n");
+ test_msg("running space stealing from bitmap to extent");
/*
* For this test, we want to ensure we end up with an extent entry
@@ -430,7 +432,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = test_add_free_space_entry(cache, SZ_128M - SZ_256K, SZ_128K, 0);
if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
+ test_err("couldn't add extent entry %d", ret);
return ret;
}
@@ -438,7 +440,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
ret = test_add_free_space_entry(cache, SZ_128M + SZ_512K,
SZ_128M - SZ_512K, 1);
if (ret) {
- test_msg("Couldn't add bitmap entry %d\n", ret);
+ test_err("couldn't add bitmap entry %d", ret);
return ret;
}
@@ -457,17 +459,17 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
SZ_128M + 768 * SZ_1K,
SZ_128M - 768 * SZ_1K);
if (ret) {
- test_msg("Failed to free part of bitmap space %d\n", ret);
+ test_err("failed to free part of bitmap space %d", ret);
return ret;
}
/* Confirm that only those 2 ranges are marked as free. */
if (!test_check_exists(cache, SZ_128M - SZ_256K, SZ_128K)) {
- test_msg("Free space range missing\n");
+ test_err("free space range missing");
return -ENOENT;
}
if (!test_check_exists(cache, SZ_128M + SZ_512K, SZ_256K)) {
- test_msg("Free space range missing\n");
+ test_err("free space range missing");
return -ENOENT;
}
@@ -477,7 +479,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
if (test_check_exists(cache, SZ_128M + 768 * SZ_1K,
SZ_128M - 768 * SZ_1K)) {
- test_msg("Bitmap region not removed from space cache\n");
+ test_err("bitmap region not removed from space cache");
return -EINVAL;
}
@@ -486,7 +488,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
* covered by the bitmap, isn't marked as free.
*/
if (test_check_exists(cache, SZ_128M + SZ_256K, SZ_256K)) {
- test_msg("Invalid bitmap region marked as free\n");
+ test_err("invalid bitmap region marked as free");
return -EINVAL;
}
@@ -495,7 +497,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
* by the bitmap too, isn't marked as free either.
*/
if (test_check_exists(cache, SZ_128M, SZ_256K)) {
- test_msg("Invalid bitmap region marked as free\n");
+ test_err("invalid bitmap region marked as free");
return -EINVAL;
}
@@ -506,12 +508,12 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_add_free_space(cache, SZ_128M, SZ_512K);
if (ret) {
- test_msg("Error adding free space: %d\n", ret);
+ test_err("error adding free space: %d", ret);
return ret;
}
/* Confirm the region is marked as free. */
if (!test_check_exists(cache, SZ_128M, SZ_512K)) {
- test_msg("Bitmap region not marked as free\n");
+ test_err("bitmap region not marked as free");
return -ENOENT;
}
@@ -531,7 +533,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, sectorsize);
if (ret) {
- test_msg("Error adding free space: %d\n", ret);
+ test_err("error adding free space: %d", ret);
return ret;
}
@@ -550,12 +552,12 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_add_free_space(cache, SZ_128M - SZ_128K, SZ_128K);
if (ret) {
- test_msg("Error adding free space: %d\n", ret);
+ test_err("error adding free space: %d", ret);
return ret;
}
/* Confirm the region is marked as free. */
if (!test_check_exists(cache, SZ_128M - SZ_128K, SZ_128K)) {
- test_msg("Extent region not marked as free\n");
+ test_err("extent region not marked as free");
return -ENOENT;
}
@@ -583,12 +585,12 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
* allocate the whole free space at once.
*/
if (!test_check_exists(cache, SZ_128M - SZ_256K, SZ_1M)) {
- test_msg("Expected region not marked as free\n");
+ test_err("expected region not marked as free");
return -ENOENT;
}
if (cache->free_space_ctl->free_space != (SZ_1M + sectorsize)) {
- test_msg("Cache free space is not 1Mb + %u\n", sectorsize);
+ test_err("cache free space is not 1Mb + %u", sectorsize);
return -EINVAL;
}
@@ -596,7 +598,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
0, SZ_1M, 0,
&max_extent_size);
if (offset != (SZ_128M - SZ_256K)) {
- test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n",
+ test_err(
+ "failed to allocate 1Mb from space cache, returned offset is: %llu",
offset);
return -EINVAL;
}
@@ -610,7 +613,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
return ret;
if (cache->free_space_ctl->free_space != sectorsize) {
- test_msg("Cache free space is not %u\n", sectorsize);
+ test_err("cache free space is not %u", sectorsize);
return -EINVAL;
}
@@ -618,7 +621,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
0, sectorsize, 0,
&max_extent_size);
if (offset != (SZ_128M + SZ_16M)) {
- test_msg("Failed to allocate %u, returned offset : %llu\n",
+ test_err("failed to allocate %u, returned offset : %llu",
sectorsize, offset);
return -EINVAL;
}
@@ -640,14 +643,14 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = test_add_free_space_entry(cache, SZ_128M + SZ_128K, SZ_128K, 0);
if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
+ test_err("couldn't add extent entry %d", ret);
return ret;
}
/* Bitmap entry covering free space range [0, 128Mb - 512Kb[ */
ret = test_add_free_space_entry(cache, 0, SZ_128M - SZ_512K, 1);
if (ret) {
- test_msg("Couldn't add bitmap entry %d\n", ret);
+ test_err("couldn't add bitmap entry %d", ret);
return ret;
}
@@ -664,17 +667,17 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_remove_free_space(cache, 0, SZ_128M - 768 * SZ_1K);
if (ret) {
- test_msg("Failed to free part of bitmap space %d\n", ret);
+ test_err("failed to free part of bitmap space %d", ret);
return ret;
}
/* Confirm that only those 2 ranges are marked as free. */
if (!test_check_exists(cache, SZ_128M + SZ_128K, SZ_128K)) {
- test_msg("Free space range missing\n");
+ test_err("free space range missing");
return -ENOENT;
}
if (!test_check_exists(cache, SZ_128M - 768 * SZ_1K, SZ_256K)) {
- test_msg("Free space range missing\n");
+ test_err("free space range missing");
return -ENOENT;
}
@@ -683,7 +686,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
* as free anymore.
*/
if (test_check_exists(cache, 0, SZ_128M - 768 * SZ_1K)) {
- test_msg("Bitmap region not removed from space cache\n");
+ test_err("bitmap region not removed from space cache");
return -EINVAL;
}
@@ -692,7 +695,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
* covered by the bitmap, isn't marked as free.
*/
if (test_check_exists(cache, SZ_128M - SZ_512K, SZ_512K)) {
- test_msg("Invalid bitmap region marked as free\n");
+ test_err("invalid bitmap region marked as free");
return -EINVAL;
}
@@ -703,12 +706,12 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_add_free_space(cache, SZ_128M - SZ_512K, SZ_512K);
if (ret) {
- test_msg("Error adding free space: %d\n", ret);
+ test_err("error adding free space: %d", ret);
return ret;
}
/* Confirm the region is marked as free. */
if (!test_check_exists(cache, SZ_128M - SZ_512K, SZ_512K)) {
- test_msg("Bitmap region not marked as free\n");
+ test_err("bitmap region not marked as free");
return -ENOENT;
}
@@ -728,7 +731,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_add_free_space(cache, SZ_32M, 2 * sectorsize);
if (ret) {
- test_msg("Error adding free space: %d\n", ret);
+ test_err("error adding free space: %d", ret);
return ret;
}
@@ -739,12 +742,12 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_add_free_space(cache, SZ_128M, SZ_128K);
if (ret) {
- test_msg("Error adding free space: %d\n", ret);
+ test_err("error adding free space: %d", ret);
return ret;
}
/* Confirm the region is marked as free. */
if (!test_check_exists(cache, SZ_128M, SZ_128K)) {
- test_msg("Extent region not marked as free\n");
+ test_err("extent region not marked as free");
return -ENOENT;
}
@@ -772,19 +775,20 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
* allocate the whole free space at once.
*/
if (!test_check_exists(cache, SZ_128M - 768 * SZ_1K, SZ_1M)) {
- test_msg("Expected region not marked as free\n");
+ test_err("expected region not marked as free");
return -ENOENT;
}
if (cache->free_space_ctl->free_space != (SZ_1M + 2 * sectorsize)) {
- test_msg("Cache free space is not 1Mb + %u\n", 2 * sectorsize);
+ test_err("cache free space is not 1Mb + %u", 2 * sectorsize);
return -EINVAL;
}
offset = btrfs_find_space_for_alloc(cache, 0, SZ_1M, 0,
&max_extent_size);
if (offset != (SZ_128M - 768 * SZ_1K)) {
- test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n",
+ test_err(
+ "failed to allocate 1Mb from space cache, returned offset is: %llu",
offset);
return -EINVAL;
}
@@ -798,7 +802,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
return ret;
if (cache->free_space_ctl->free_space != 2 * sectorsize) {
- test_msg("Cache free space is not %u\n", 2 * sectorsize);
+ test_err("cache free space is not %u", 2 * sectorsize);
return -EINVAL;
}
@@ -806,9 +810,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
0, 2 * sectorsize, 0,
&max_extent_size);
if (offset != SZ_32M) {
- test_msg("Failed to allocate %u, offset: %llu\n",
- 2 * sectorsize,
- offset);
+ test_err("failed to allocate %u, offset: %llu",
+ 2 * sectorsize, offset);
return -EINVAL;
}
@@ -829,7 +832,7 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
struct btrfs_root *root = NULL;
int ret = -ENOMEM;
- test_msg("Running btrfs free space cache tests\n");
+ test_msg("running btrfs free space cache tests");
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info)
return -ENOMEM;
@@ -843,7 +846,7 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
cache = btrfs_alloc_dummy_block_group(fs_info,
BITS_PER_BITMAP * sectorsize + PAGE_SIZE);
if (!cache) {
- test_msg("Couldn't run the tests\n");
+ test_err("couldn't run the tests");
btrfs_free_dummy_fs_info(fs_info);
return 0;
}
@@ -871,6 +874,6 @@ out:
btrfs_free_dummy_block_group(cache);
btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info);
- test_msg("Free space cache tests finished\n");
+ test_msg("free space cache tests finished");
return ret;
}
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index e1f9666c4974..89346da890cf 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -32,7 +32,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
info = search_free_space_info(trans, fs_info, cache, path, 0);
if (IS_ERR(info)) {
- test_msg("Could not find free space info\n");
+ test_err("could not find free space info");
ret = PTR_ERR(info);
goto out;
}
@@ -40,7 +40,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
if (extent_count != num_extents) {
- test_msg("Extent count is wrong\n");
+ test_err("extent count is wrong");
ret = -EINVAL;
goto out;
}
@@ -99,7 +99,7 @@ out:
btrfs_release_path(path);
return ret;
invalid:
- test_msg("Free space tree is invalid\n");
+ test_err("free space tree is invalid");
ret = -EINVAL;
goto out;
}
@@ -117,7 +117,7 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
info = search_free_space_info(trans, fs_info, cache, path, 0);
if (IS_ERR(info)) {
- test_msg("Could not find free space info\n");
+ test_err("could not find free space info");
btrfs_release_path(path);
return PTR_ERR(info);
}
@@ -131,15 +131,15 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
/* Flip it to the other format and check that for good measure. */
if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
- ret = convert_free_space_to_extents(trans, fs_info, cache, path);
+ ret = convert_free_space_to_extents(trans, cache, path);
if (ret) {
- test_msg("Could not convert to extents\n");
+ test_err("could not convert to extents");
return ret;
}
} else {
- ret = convert_free_space_to_bitmaps(trans, fs_info, cache, path);
+ ret = convert_free_space_to_bitmaps(trans, cache, path);
if (ret) {
- test_msg("Could not convert to bitmaps\n");
+ test_err("could not convert to bitmaps");
return ret;
}
}
@@ -170,11 +170,11 @@ static int test_remove_all(struct btrfs_trans_handle *trans,
const struct free_space_extent extents[] = {};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid,
cache->key.offset);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
@@ -194,10 +194,10 @@ static int test_remove_beginning(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid, alignment);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
@@ -217,12 +217,12 @@ static int test_remove_end(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid +
cache->key.offset - alignment,
alignment);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
@@ -243,11 +243,11 @@ static int test_remove_middle(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid + alignment,
alignment);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
@@ -266,26 +266,26 @@ static int test_merge_left(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid,
cache->key.offset);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid, alignment);
+ ret = __add_to_free_space_tree(trans, cache, path, cache->key.objectid,
+ alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
@@ -304,27 +304,27 @@ static int test_merge_right(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid,
cache->key.offset);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + 2 * alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
@@ -343,34 +343,34 @@ static int test_merge_both(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid,
cache->key.offset);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid, alignment);
+ ret = __add_to_free_space_tree(trans, cache, path, cache->key.objectid,
+ alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + 2 * alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
@@ -391,34 +391,34 @@ static int test_merge_none(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid,
cache->key.offset);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid, alignment);
+ ret = __add_to_free_space_tree(trans, cache, path, cache->key.objectid,
+ alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + 4 * alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + 2 * alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
@@ -444,14 +444,14 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ test_err("couldn't allocate dummy fs info");
ret = -ENOMEM;
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_msg("Couldn't allocate dummy root\n");
+ test_err("couldn't allocate dummy root");
ret = PTR_ERR(root);
goto out;
}
@@ -463,7 +463,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
if (!root->node) {
- test_msg("Couldn't allocate dummy buffer\n");
+ test_err("couldn't allocate dummy buffer");
ret = -ENOMEM;
goto out;
}
@@ -473,7 +473,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
cache = btrfs_alloc_dummy_block_group(fs_info, 8 * alignment);
if (!cache) {
- test_msg("Couldn't allocate dummy block group cache\n");
+ test_err("couldn't allocate dummy block group cache");
ret = -ENOMEM;
goto out;
}
@@ -482,26 +482,25 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
cache->needs_free_space = 1;
cache->fs_info = root->fs_info;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, root->fs_info);
path = btrfs_alloc_path();
if (!path) {
- test_msg("Couldn't allocate path\n");
+ test_err("couldn't allocate path");
ret = -ENOMEM;
goto out;
}
- ret = add_block_group_free_space(&trans, root->fs_info, cache);
+ ret = add_block_group_free_space(&trans, cache);
if (ret) {
- test_msg("Could not add block group free space\n");
+ test_err("could not add block group free space");
goto out;
}
if (bitmaps) {
- ret = convert_free_space_to_bitmaps(&trans, root->fs_info,
- cache, path);
+ ret = convert_free_space_to_bitmaps(&trans, cache, path);
if (ret) {
- test_msg("Could not convert block group to bitmaps\n");
+ test_err("could not convert block group to bitmaps");
goto out;
}
}
@@ -510,14 +509,14 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
if (ret)
goto out;
- ret = remove_block_group_free_space(&trans, root->fs_info, cache);
+ ret = remove_block_group_free_space(&trans, cache);
if (ret) {
- test_msg("Could not remove block group free space\n");
+ test_err("could not remove block group free space");
goto out;
}
if (btrfs_header_nritems(root->node) != 0) {
- test_msg("Free space tree has leftover items\n");
+ test_err("free space tree has leftover items");
ret = -EINVAL;
goto out;
}
@@ -539,14 +538,16 @@ static int run_test_both_formats(test_func_t test_func, u32 sectorsize,
ret = run_test(test_func, 0, sectorsize, nodesize, alignment);
if (ret) {
- test_msg("%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u\n",
+ test_err(
+ "%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u",
test_func, sectorsize, nodesize, alignment);
test_ret = ret;
}
ret = run_test(test_func, 1, sectorsize, nodesize, alignment);
if (ret) {
- test_msg("%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u\n",
+ test_err(
+ "%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u",
test_func, sectorsize, nodesize, alignment);
test_ret = ret;
}
@@ -577,7 +578,7 @@ int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
*/
bitmap_alignment = BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE;
- test_msg("Running free space tree tests\n");
+ test_msg("running free space tree tests");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
int ret;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index e0ba799536b4..64043f028820 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -228,7 +228,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
inode = btrfs_new_test_inode();
if (!inode) {
- test_msg("Couldn't allocate inode\n");
+ test_err("couldn't allocate inode");
return ret;
}
@@ -238,19 +238,19 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ test_err("couldn't allocate dummy fs info");
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ test_err("couldn't allocate root");
goto out;
}
root->node = alloc_dummy_extent_buffer(fs_info, nodesize);
if (!root->node) {
- test_msg("Couldn't allocate dummy buffer\n");
+ test_err("couldn't allocate dummy buffer");
goto out;
}
@@ -268,11 +268,11 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize, 0);
if (IS_ERR(em)) {
em = NULL;
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
+ test_err("expected a hole, got %llu", em->block_start);
goto out;
}
free_extent_map(em);
@@ -287,20 +287,21 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
+ test_err("expected a hole, got %llu", em->block_start);
goto out;
}
if (em->start != 0 || em->len != 5) {
- test_msg("Unexpected extent wanted start 0 len 5, got start "
- "%llu len %llu\n", em->start, em->len);
+ test_err(
+ "unexpected extent wanted start 0 len 5, got start %llu len %llu",
+ em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
offset = em->start + em->len;
@@ -308,21 +309,22 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_INLINE) {
- test_msg("Expected an inline, got %llu\n", em->block_start);
+ test_err("expected an inline, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != (sectorsize - 5)) {
- test_msg("Unexpected extent wanted start %llu len 1, got start "
- "%llu len %llu\n", offset, em->start, em->len);
+ test_err(
+ "unexpected extent wanted start %llu len 1, got start %llu len %llu",
+ offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
/*
@@ -335,20 +337,21 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
+ test_err("expected a hole, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4) {
- test_msg("Unexpected extent wanted start %llu len 4, got start "
- "%llu len %llu\n", offset, em->start, em->len);
+ test_err(
+ "unexpected extent wanted start %llu len 4, got start %llu len %llu",
+ offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
offset = em->start + em->len;
@@ -357,24 +360,25 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* Regular extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize - 1) {
- test_msg("Unexpected extent wanted start %llu len 4095, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ test_err(
+ "unexpected extent wanted start %llu len 4095, got start %llu len %llu",
+ offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -384,25 +388,25 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* The next 3 are split extents */
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -413,21 +417,21 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
+ test_err("expected a hole, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
offset = em->start + em->len;
@@ -435,31 +439,31 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != 2 * sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != orig_start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n",
+ test_err("wrong orig offset, want %llu, have %llu",
orig_start, em->orig_start);
goto out;
}
disk_bytenr += (em->start - orig_start);
if (em->block_start != disk_bytenr) {
- test_msg("Wrong block start, want %llu, have %llu\n",
+ test_err("wrong block start, want %llu, have %llu",
disk_bytenr, em->block_start);
goto out;
}
@@ -469,26 +473,26 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* Prealloc extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
prealloc_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -498,26 +502,26 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* The next 3 are a half written prealloc extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
prealloc_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -528,30 +532,30 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_HOLE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != orig_start) {
- test_msg("Unexpected orig offset, wanted %llu, have %llu\n",
+ test_err("unexpected orig offset, wanted %llu, have %llu",
orig_start, em->orig_start);
goto out;
}
if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
- test_msg("Unexpected block start, wanted %llu, have %llu\n",
+ test_err("unexpected block start, wanted %llu, have %llu",
disk_bytenr + (em->start - em->orig_start),
em->block_start);
goto out;
@@ -561,31 +565,31 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != 2 * sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
prealloc_only, em->flags);
goto out;
}
if (em->orig_start != orig_start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", orig_start,
+ test_err("wrong orig offset, want %llu, have %llu", orig_start,
em->orig_start);
goto out;
}
if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
- test_msg("Unexpected block start, wanted %llu, have %llu\n",
+ test_err("unexpected block start, wanted %llu, have %llu",
disk_bytenr + (em->start - em->orig_start),
em->block_start);
goto out;
@@ -596,31 +600,31 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* Now for the compressed extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != 2 * sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u,"
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
compressed_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n",
+ test_err("wrong orig offset, want %llu, have %llu",
em->start, em->orig_start);
goto out;
}
if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
- test_msg("Unexpected compress type, wanted %d, got %d\n",
+ test_err("unexpected compress type, wanted %d, got %d",
BTRFS_COMPRESS_ZLIB, em->compress_type);
goto out;
}
@@ -630,31 +634,31 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* Split compressed extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u,"
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
compressed_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n",
+ test_err("wrong orig offset, want %llu, have %llu",
em->start, em->orig_start);
goto out;
}
if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
- test_msg("Unexpected compress type, wanted %d, got %d\n",
+ test_err("unexpected compress type, wanted %d, got %d",
BTRFS_COMPRESS_ZLIB, em->compress_type);
goto out;
}
@@ -665,25 +669,25 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -692,32 +696,32 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != disk_bytenr) {
- test_msg("Block start does not match, want %llu got %llu\n",
+ test_err("block start does not match, want %llu got %llu",
disk_bytenr, em->block_start);
goto out;
}
if (em->start != offset || em->len != 2 * sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
compressed_only, em->flags);
goto out;
}
if (em->orig_start != orig_start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n",
+ test_err("wrong orig offset, want %llu, have %llu",
em->start, orig_start);
goto out;
}
if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
- test_msg("Unexpected compress type, wanted %d, got %d\n",
+ test_err("unexpected compress type, wanted %d, got %d",
BTRFS_COMPRESS_ZLIB, em->compress_type);
goto out;
}
@@ -728,25 +732,25 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6,
sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -755,11 +759,11 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole extent, got %llu\n", em->block_start);
+ test_err("expected a hole extent, got %llu", em->block_start);
goto out;
}
/*
@@ -768,18 +772,18 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
* test.
*/
if (em->start != offset || em->len != 3 * sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, 3 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != vacancy_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
vacancy_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -788,25 +792,25 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u,"
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -830,7 +834,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
inode = btrfs_new_test_inode();
if (!inode) {
- test_msg("Couldn't allocate inode\n");
+ test_err("couldn't allocate inode");
return ret;
}
@@ -840,19 +844,19 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ test_err("couldn't allocate dummy fs info");
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ test_err("couldn't allocate root");
goto out;
}
root->node = alloc_dummy_extent_buffer(fs_info, nodesize);
if (!root->node) {
- test_msg("Couldn't allocate dummy buffer\n");
+ test_err("couldn't allocate dummy buffer");
goto out;
}
@@ -871,21 +875,21 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
+ test_err("expected a hole, got %llu", em->block_start);
goto out;
}
if (em->start != 0 || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start 0 len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start 0 len %u, got start %llu len %llu",
sectorsize, em->start, em->len);
goto out;
}
if (em->flags != vacancy_only) {
- test_msg("Wrong flags, wanted %lu, have %lu\n", vacancy_only,
+ test_err("wrong flags, wanted %lu, have %lu", vacancy_only,
em->flags);
goto out;
}
@@ -894,21 +898,21 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize,
2 * sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != sectorsize) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != sectorsize || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %u len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %u len %u, got start %llu len %llu",
sectorsize, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, wanted 0 got %lu\n",
+ test_err("unexpected flags set, wanted 0 got %lu",
em->flags);
goto out;
}
@@ -931,19 +935,19 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
inode = btrfs_new_test_inode();
if (!inode) {
- test_msg("Couldn't allocate inode\n");
+ test_err("couldn't allocate inode");
return ret;
}
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ test_err("couldn't allocate dummy fs info");
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ test_err("couldn't allocate root");
goto out;
}
@@ -954,12 +958,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, 0,
NULL, 0);
if (ret) {
- test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 1) {
ret = -EINVAL;
- test_msg("Miscount, wanted 1, got %u\n",
+ test_err("miscount, wanted 1, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -969,12 +973,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
0, NULL, 0);
if (ret) {
- test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 2) {
ret = -EINVAL;
- test_msg("Miscount, wanted 2, got %u\n",
+ test_err("miscount, wanted 2, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -986,12 +990,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
EXTENT_DELALLOC | EXTENT_DIRTY |
EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
- test_msg("clear_extent_bit returned %d\n", ret);
+ test_err("clear_extent_bit returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 2) {
ret = -EINVAL;
- test_msg("Miscount, wanted 2, got %u\n",
+ test_err("miscount, wanted 2, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1002,12 +1006,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
+ sectorsize - 1,
0, NULL, 0);
if (ret) {
- test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 2) {
ret = -EINVAL;
- test_msg("Miscount, wanted 2, got %u\n",
+ test_err("miscount, wanted 2, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1020,12 +1024,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
(BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
0, NULL, 0);
if (ret) {
- test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 4) {
ret = -EINVAL;
- test_msg("Miscount, wanted 4, got %u\n",
+ test_err("miscount, wanted 4, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1037,12 +1041,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0);
if (ret) {
- test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 3) {
ret = -EINVAL;
- test_msg("Miscount, wanted 3, got %u\n",
+ test_err("miscount, wanted 3, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1054,12 +1058,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
- test_msg("clear_extent_bit returned %d\n", ret);
+ test_err("clear_extent_bit returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 4) {
ret = -EINVAL;
- test_msg("Miscount, wanted 4, got %u\n",
+ test_err("miscount, wanted 4, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1072,12 +1076,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0);
if (ret) {
- test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 3) {
ret = -EINVAL;
- test_msg("Miscount, wanted 3, got %u\n",
+ test_err("miscount, wanted 3, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1087,12 +1091,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
- test_msg("clear_extent_bit returned %d\n", ret);
+ test_err("clear_extent_bit returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents) {
ret = -EINVAL;
- test_msg("Miscount, wanted 0, got %u\n",
+ test_err("miscount, wanted 0, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1115,14 +1119,14 @@ int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
- test_msg("Running btrfs_get_extent tests\n");
+ test_msg("running btrfs_get_extent tests");
ret = test_btrfs_get_extent(sectorsize, nodesize);
if (ret)
return ret;
- test_msg("Running hole first btrfs_get_extent test\n");
+ test_msg("running hole first btrfs_get_extent test");
ret = test_hole_first(sectorsize, nodesize);
if (ret)
return ret;
- test_msg("Running outstanding_extents tests\n");
+ test_msg("running outstanding_extents tests");
return test_extent_accounting(sectorsize, nodesize);
}
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 39b95783f736..ace94db09d29 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -24,7 +24,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
u32 size = sizeof(*item) + sizeof(*iref) + sizeof(*block_info);
int ret;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, NULL);
ins.objectid = bytenr;
ins.type = BTRFS_EXTENT_ITEM_KEY;
@@ -32,14 +32,14 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
path = btrfs_alloc_path();
if (!path) {
- test_msg("Couldn't allocate path\n");
+ test_err("couldn't allocate path");
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(&trans, root, path, &ins, size);
if (ret) {
- test_msg("Couldn't insert ref %d\n", ret);
+ test_err("couldn't insert ref %d", ret);
btrfs_free_path(path);
return ret;
}
@@ -74,7 +74,7 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
u64 refs;
int ret;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, NULL);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -82,14 +82,14 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
path = btrfs_alloc_path();
if (!path) {
- test_msg("Couldn't allocate path\n");
+ test_err("couldn't allocate path");
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
if (ret) {
- test_msg("Couldn't find extent ref\n");
+ test_err("couldn't find extent ref");
btrfs_free_path(path);
return ret;
}
@@ -111,7 +111,7 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
ret = btrfs_insert_empty_item(&trans, root, path, &key, 0);
if (ret)
- test_msg("Failed to insert backref\n");
+ test_err("failed to insert backref");
btrfs_free_path(path);
return ret;
}
@@ -124,7 +124,7 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
struct btrfs_path *path;
int ret;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, NULL);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -132,14 +132,14 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
path = btrfs_alloc_path();
if (!path) {
- test_msg("Couldn't allocate path\n");
+ test_err("couldn't allocate path");
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
if (ret) {
- test_msg("Didn't find our key %d\n", ret);
+ test_err("didn't find our key %d", ret);
btrfs_free_path(path);
return ret;
}
@@ -158,7 +158,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
u64 refs;
int ret;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, NULL);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -166,14 +166,14 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
path = btrfs_alloc_path();
if (!path) {
- test_msg("Couldn't allocate path\n");
+ test_err("couldn't allocate path");
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
if (ret) {
- test_msg("Couldn't find extent ref\n");
+ test_err("couldn't find extent ref");
btrfs_free_path(path);
return ret;
}
@@ -195,7 +195,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
if (ret) {
- test_msg("Couldn't find backref %d\n", ret);
+ test_err("couldn't find backref %d", ret);
btrfs_free_path(path);
return ret;
}
@@ -213,12 +213,12 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
struct ulist *new_roots = NULL;
int ret;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, fs_info);
- test_msg("Qgroup basic add\n");
+ test_msg("qgroup basic add");
ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID);
if (ret) {
- test_msg("Couldn't create a qgroup %d\n", ret);
+ test_err("couldn't create a qgroup %d", ret);
return ret;
}
@@ -231,7 +231,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
false);
if (ret) {
ulist_free(old_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -245,20 +245,20 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
nodesize, old_roots, new_roots);
if (ret) {
- test_msg("Couldn't account space for a qgroup %d\n", ret);
+ test_err("couldn't account space for a qgroup %d", ret);
return ret;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
nodesize, nodesize)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
old_roots = NULL;
@@ -268,7 +268,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
false);
if (ret) {
ulist_free(old_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -281,19 +281,19 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
nodesize, old_roots, new_roots);
if (ret) {
- test_msg("Couldn't account space for a qgroup %d\n", ret);
+ test_err("couldn't account space for a qgroup %d", ret);
return -EINVAL;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, 0, 0)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
@@ -314,9 +314,9 @@ static int test_multiple_refs(struct btrfs_root *root,
struct ulist *new_roots = NULL;
int ret;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, fs_info);
- test_msg("Qgroup multiple refs test\n");
+ test_msg("qgroup multiple refs test");
/*
* We have BTRFS_FS_TREE_OBJECTID created already from the
@@ -324,7 +324,7 @@ static int test_multiple_refs(struct btrfs_root *root,
*/
ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID);
if (ret) {
- test_msg("Couldn't create a qgroup %d\n", ret);
+ test_err("couldn't create a qgroup %d", ret);
return ret;
}
@@ -332,7 +332,7 @@ static int test_multiple_refs(struct btrfs_root *root,
false);
if (ret) {
ulist_free(old_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -346,20 +346,20 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
nodesize, old_roots, new_roots);
if (ret) {
- test_msg("Couldn't account space for a qgroup %d\n", ret);
+ test_err("couldn't account space for a qgroup %d", ret);
return ret;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
nodesize, nodesize)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
@@ -367,7 +367,7 @@ static int test_multiple_refs(struct btrfs_root *root,
false);
if (ret) {
ulist_free(old_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -381,26 +381,26 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
nodesize, old_roots, new_roots);
if (ret) {
- test_msg("Couldn't account space for a qgroup %d\n", ret);
+ test_err("couldn't account space for a qgroup %d", ret);
return ret;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
nodesize, 0)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID,
nodesize, 0)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
@@ -408,7 +408,7 @@ static int test_multiple_refs(struct btrfs_root *root,
false);
if (ret) {
ulist_free(old_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -422,26 +422,26 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
nodesize, old_roots, new_roots);
if (ret) {
- test_msg("Couldn't account space for a qgroup %d\n", ret);
+ test_err("couldn't account space for a qgroup %d", ret);
return ret;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID,
0, 0)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
nodesize, nodesize)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
@@ -457,13 +457,13 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ test_err("couldn't allocate dummy fs info");
return -ENOMEM;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ test_err("couldn't allocate root");
ret = PTR_ERR(root);
goto out;
}
@@ -485,7 +485,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
*/
root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
if (!root->node) {
- test_msg("Couldn't allocate dummy buffer\n");
+ test_err("couldn't allocate dummy buffer");
ret = -ENOMEM;
goto out;
}
@@ -495,7 +495,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
tmp_root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(tmp_root)) {
- test_msg("Couldn't allocate a fs root\n");
+ test_err("couldn't allocate a fs root");
ret = PTR_ERR(tmp_root);
goto out;
}
@@ -504,13 +504,13 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
root->fs_info->fs_root = tmp_root;
ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
if (ret) {
- test_msg("Couldn't insert fs root %d\n", ret);
+ test_err("couldn't insert fs root %d", ret);
goto out;
}
tmp_root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(tmp_root)) {
- test_msg("Couldn't allocate a fs root\n");
+ test_err("couldn't allocate a fs root");
ret = PTR_ERR(tmp_root);
goto out;
}
@@ -518,11 +518,11 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
tmp_root->root_key.objectid = BTRFS_FIRST_FREE_OBJECTID;
ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
if (ret) {
- test_msg("Couldn't insert fs root %d\n", ret);
+ test_err("couldn't insert fs root %d", ret);
goto out;
}
- test_msg("Running qgroup tests\n");
+ test_msg("running qgroup tests");
ret = test_no_shared_qgroup(root, sectorsize, nodesize);
if (ret)
goto out;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c944b4769e3c..4485eae41e88 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -877,12 +877,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
atomic_dec(&cur_trans->num_writers);
extwriter_counter_dec(cur_trans, trans->type);
- /*
- * Make sure counter is updated before we wake up waiters.
- */
- smp_mb();
- if (waitqueue_active(&cur_trans->writer_wait))
- wake_up(&cur_trans->writer_wait);
+ cond_wake_up(&cur_trans->writer_wait);
btrfs_put_transaction(cur_trans);
if (current->journal_info == trans)
@@ -1250,7 +1245,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
btrfs_free_log(trans, root);
btrfs_update_reloc_root(trans, root);
- btrfs_orphan_commit_root(trans, root);
btrfs_save_ino_cache(root, trans);
@@ -1640,15 +1634,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, ret);
goto fail;
}
- ret = btrfs_uuid_tree_add(trans, fs_info, new_uuid.b,
- BTRFS_UUID_KEY_SUBVOL, objectid);
+ ret = btrfs_uuid_tree_add(trans, new_uuid.b, BTRFS_UUID_KEY_SUBVOL,
+ objectid);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
- ret = btrfs_uuid_tree_add(trans, fs_info,
- new_root_item->received_uuid,
+ ret = btrfs_uuid_tree_add(trans, new_root_item->received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
objectid);
if (ret && ret != -EEXIST) {
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d8c0826bc2c7..94439482a0ec 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -139,7 +139,6 @@ struct btrfs_pending_snapshot {
struct btrfs_path *path;
/* block reservation for the operation */
struct btrfs_block_rsv block_rsv;
- u64 qgroup_reserved;
/* extra metadata reservation for relocation */
int error;
bool readonly;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 43758e30aa7a..f8220ec02036 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -222,11 +222,8 @@ int btrfs_pin_log_trans(struct btrfs_root *root)
void btrfs_end_log_trans(struct btrfs_root *root)
{
if (atomic_dec_and_test(&root->log_writers)) {
- /*
- * Implicit memory barrier after atomic_dec_and_test
- */
- if (waitqueue_active(&root->log_writer_wait))
- wake_up(&root->log_writer_wait);
+ /* atomic_dec_and_test implies a barrier */
+ cond_wake_up_nomb(&root->log_writer_wait);
}
}
@@ -2988,11 +2985,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_lock(&log_root_tree->log_mutex);
if (atomic_dec_and_test(&log_root_tree->log_writers)) {
- /*
- * Implicit memory barrier after atomic_dec_and_test
- */
- if (waitqueue_active(&log_root_tree->log_writer_wait))
- wake_up(&log_root_tree->log_writer_wait);
+ /* atomic_dec_and_test implies a barrier */
+ cond_wake_up_nomb(&log_root_tree->log_writer_wait);
}
if (ret) {
@@ -3116,10 +3110,11 @@ out_wake_log_root:
mutex_unlock(&log_root_tree->log_mutex);
/*
- * The barrier before waitqueue_active is implied by mutex_unlock
+ * The barrier before waitqueue_active (in cond_wake_up) is needed so
+ * all the updates above are seen by the woken threads. It might not be
+ * necessary, but proving that seems to be hard.
*/
- if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
- wake_up(&log_root_tree->log_commit_wait[index2]);
+ cond_wake_up(&log_root_tree->log_commit_wait[index2]);
out:
mutex_lock(&root->log_mutex);
btrfs_remove_all_log_ctxs(root, index1, ret);
@@ -3128,10 +3123,11 @@ out:
mutex_unlock(&root->log_mutex);
/*
- * The barrier before waitqueue_active is implied by mutex_unlock
+ * The barrier before waitqueue_active (in cond_wake_up) is needed so
+ * all the updates above are seen by the woken threads. It might not be
+ * necessary, but proving that seems to be hard.
*/
- if (waitqueue_active(&root->log_commit_wait[index1]))
- wake_up(&root->log_commit_wait[index1]);
+ cond_wake_up(&root->log_commit_wait[index1]);
return ret;
}
@@ -4320,6 +4316,110 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
return ret;
}
+/*
+ * Log all prealloc extents beyond the inode's i_size to make sure we do not
+ * lose them after doing a fast fsync and replaying the log. We scan the
+ * subvolume's root instead of iterating the inode's extent map tree because
+ * otherwise we can log incorrect extent items based on extent map conversion.
+ * That can happen due to the fact that extent maps are merged when they
+ * are not in the extent map tree's list of modified extents.
+ */
+static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_key key;
+ const u64 i_size = i_size_read(&inode->vfs_inode);
+ const u64 ino = btrfs_ino(inode);
+ struct btrfs_path *dst_path = NULL;
+ u64 last_extent = (u64)-1;
+ int ins_nr = 0;
+ int start_slot;
+ int ret;
+
+ if (!(inode->flags & BTRFS_INODE_PREALLOC))
+ return 0;
+
+ key.objectid = ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = i_size;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ while (true) {
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ if (ins_nr > 0) {
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, start_slot,
+ ins_nr, 1, 0);
+ if (ret < 0)
+ goto out;
+ ins_nr = 0;
+ }
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid > ino)
+ break;
+ if (WARN_ON_ONCE(key.objectid < ino) ||
+ key.type < BTRFS_EXTENT_DATA_KEY ||
+ key.offset < i_size) {
+ path->slots[0]++;
+ continue;
+ }
+ if (last_extent == (u64)-1) {
+ last_extent = key.offset;
+ /*
+ * Avoid logging extent items logged in past fsync calls
+ * and leading to duplicate keys in the log tree.
+ */
+ do {
+ ret = btrfs_truncate_inode_items(trans,
+ root->log_root,
+ &inode->vfs_inode,
+ i_size,
+ BTRFS_EXTENT_DATA_KEY);
+ } while (ret == -EAGAIN);
+ if (ret)
+ goto out;
+ }
+ if (ins_nr == 0)
+ start_slot = slot;
+ ins_nr++;
+ path->slots[0]++;
+ if (!dst_path) {
+ dst_path = btrfs_alloc_path();
+ if (!dst_path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ }
+ if (ins_nr > 0) {
+ ret = copy_items(trans, inode, dst_path, path, &last_extent,
+ start_slot, ins_nr, 1, 0);
+ if (ret > 0)
+ ret = 0;
+ }
+out:
+ btrfs_release_path(path);
+ btrfs_free_path(dst_path);
+ return ret;
+}
+
static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *inode,
@@ -4362,6 +4462,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
if (em->generation <= test_gen)
continue;
+ /* We log prealloc extents beyond eof later. */
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) &&
+ em->start >= i_size_read(&inode->vfs_inode))
+ continue;
+
if (em->start < logged_start)
logged_start = em->start;
if ((em->start + em->len - 1) > logged_end)
@@ -4374,31 +4479,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
num++;
}
- /*
- * Add all prealloc extents beyond the inode's i_size to make sure we
- * don't lose them after doing a fast fsync and replaying the log.
- */
- if (inode->flags & BTRFS_INODE_PREALLOC) {
- struct rb_node *node;
-
- for (node = rb_last(&tree->map); node; node = rb_prev(node)) {
- em = rb_entry(node, struct extent_map, rb_node);
- if (em->start < i_size_read(&inode->vfs_inode))
- break;
- if (!list_empty(&em->list))
- continue;
- /* Same as above loop. */
- if (++num > 32768) {
- list_del_init(&tree->modified_extents);
- ret = -EFBIG;
- goto process;
- }
- refcount_inc(&em->refs);
- set_bit(EXTENT_FLAG_LOGGING, &em->flags);
- list_add_tail(&em->list, &extents);
- }
- }
-
list_sort(NULL, &extents, extent_cmp);
btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
/*
@@ -4443,6 +4523,9 @@ process:
up_write(&inode->dio_sem);
btrfs_release_path(path);
+ if (!ret)
+ ret = btrfs_log_prealloc_extents(trans, inode, path);
+
return ret;
}
@@ -4827,6 +4910,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct extent_map_tree *em_tree = &inode->extent_tree;
u64 logged_isize = 0;
bool need_log_inode_item = true;
+ bool xattrs_logged = false;
path = btrfs_alloc_path();
if (!path)
@@ -5128,6 +5212,7 @@ next_key:
err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
if (err)
goto out_unlock;
+ xattrs_logged = true;
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
btrfs_release_path(path);
btrfs_release_path(dst_path);
@@ -5140,6 +5225,11 @@ log_extents:
btrfs_release_path(dst_path);
if (need_log_inode_item) {
err = log_inode_item(trans, log, dst_path, inode);
+ if (!err && !xattrs_logged) {
+ err = btrfs_log_all_xattrs(trans, root, inode, path,
+ dst_path);
+ btrfs_release_path(path);
+ }
if (err)
goto out_unlock;
}
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 1ba7ca2a4200..3b2ae342e649 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -79,10 +79,10 @@ out:
return ret;
}
-int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
+int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
u64 subid_cpu)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *uuid_root = fs_info->uuid_root;
int ret;
struct btrfs_path *path = NULL;
@@ -144,10 +144,10 @@ out:
return ret;
}
-int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
+int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
u64 subid)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *uuid_root = fs_info->uuid_root;
int ret;
struct btrfs_path *path = NULL;
@@ -239,7 +239,7 @@ static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type,
goto out;
}
- ret = btrfs_uuid_tree_rem(trans, uuid_root->fs_info, uuid, type, subid);
+ ret = btrfs_uuid_tree_remove(trans, uuid, type, subid);
btrfs_end_transaction(trans);
out:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 292266f6ab9c..e034ad9e23b4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -40,6 +40,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 1,
.devs_increment = 2,
.ncopies = 2,
+ .raid_name = "raid10",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID10,
+ .mindev_error = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
},
[BTRFS_RAID_RAID1] = {
.sub_stripes = 1,
@@ -49,6 +52,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 1,
.devs_increment = 2,
.ncopies = 2,
+ .raid_name = "raid1",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID1,
+ .mindev_error = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
},
[BTRFS_RAID_DUP] = {
.sub_stripes = 1,
@@ -58,6 +64,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 0,
.devs_increment = 1,
.ncopies = 2,
+ .raid_name = "dup",
+ .bg_flag = BTRFS_BLOCK_GROUP_DUP,
+ .mindev_error = 0,
},
[BTRFS_RAID_RAID0] = {
.sub_stripes = 1,
@@ -67,6 +76,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 0,
.devs_increment = 1,
.ncopies = 1,
+ .raid_name = "raid0",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID0,
+ .mindev_error = 0,
},
[BTRFS_RAID_SINGLE] = {
.sub_stripes = 1,
@@ -76,6 +88,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 0,
.devs_increment = 1,
.ncopies = 1,
+ .raid_name = "single",
+ .bg_flag = 0,
+ .mindev_error = 0,
},
[BTRFS_RAID_RAID5] = {
.sub_stripes = 1,
@@ -85,6 +100,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 1,
.devs_increment = 1,
.ncopies = 2,
+ .raid_name = "raid5",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID5,
+ .mindev_error = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
},
[BTRFS_RAID_RAID6] = {
.sub_stripes = 1,
@@ -94,33 +112,19 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 2,
.devs_increment = 1,
.ncopies = 3,
+ .raid_name = "raid6",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID6,
+ .mindev_error = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
},
};
-const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
- [BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
- [BTRFS_RAID_RAID1] = BTRFS_BLOCK_GROUP_RAID1,
- [BTRFS_RAID_DUP] = BTRFS_BLOCK_GROUP_DUP,
- [BTRFS_RAID_RAID0] = BTRFS_BLOCK_GROUP_RAID0,
- [BTRFS_RAID_SINGLE] = 0,
- [BTRFS_RAID_RAID5] = BTRFS_BLOCK_GROUP_RAID5,
- [BTRFS_RAID_RAID6] = BTRFS_BLOCK_GROUP_RAID6,
-};
+const char *get_raid_name(enum btrfs_raid_types type)
+{
+ if (type >= BTRFS_NR_RAID_TYPES)
+ return NULL;
-/*
- * Table to convert BTRFS_RAID_* to the error code if minimum number of devices
- * condition is not met. Zero means there's no corresponding
- * BTRFS_ERROR_DEV_*_NOT_MET value.
- */
-const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
- [BTRFS_RAID_RAID10] = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
- [BTRFS_RAID_RAID1] = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
- [BTRFS_RAID_DUP] = 0,
- [BTRFS_RAID_RAID0] = 0,
- [BTRFS_RAID_SINGLE] = 0,
- [BTRFS_RAID_RAID5] = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
- [BTRFS_RAID_RAID6] = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
-};
+ return btrfs_raid_array[type].raid_name;
+}
static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
@@ -167,12 +171,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
* may be used to exclude some operations from running concurrently without any
* modifications to the list (see write_all_supers)
*
- * volume_mutex
- * ------------
- * coarse lock owned by a mounted filesystem; used to exclude some operations
- * that cannot run in parallel and affect the higher-level properties of the
- * filesystem like: device add/deleting/resize/replace, or balance
- *
* balance_mutex
* -------------
* protects balance structures (status, state) and context accessed from
@@ -197,6 +195,41 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
* device_list_mutex
* chunk_mutex
* balance_mutex
+ *
+ *
+ * Exclusive operations, BTRFS_FS_EXCL_OP
+ * ======================================
+ *
+ * Maintains the exclusivity of the following operations that apply to the
+ * whole filesystem and cannot run in parallel.
+ *
+ * - Balance (*)
+ * - Device add
+ * - Device remove
+ * - Device replace (*)
+ * - Resize
+ *
+ * The device operations (as above) can be in one of the following states:
+ *
+ * - Running state
+ * - Paused state
+ * - Completed state
+ *
+ * Only device operations marked with (*) can go into the Paused state for the
+ * following reasons:
+ *
+ * - ioctl (only Balance can be Paused through ioctl)
+ * - filesystem remounted as read-only
+ * - filesystem unmounted and mounted as read-only
+ * - system power-cycle and filesystem mounted as read-only
+ * - filesystem or device errors leading to forced read-only
+ *
+ * BTRFS_FS_EXCL_OP flag is set and cleared using atomic operations.
+ * During the course of Paused state, the BTRFS_FS_EXCL_OP remains set.
+ * A device operation in Paused or Running state can be canceled or resumed
+ * either by ioctl (Balance only) or when remounted as read-write.
+ * BTRFS_FS_EXCL_OP flag is cleared when the device operation is canceled or
+ * completed.
*/
DEFINE_MUTEX(uuid_mutex);
@@ -227,14 +260,14 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
INIT_LIST_HEAD(&fs_devs->devices);
INIT_LIST_HEAD(&fs_devs->resized_devices);
INIT_LIST_HEAD(&fs_devs->alloc_list);
- INIT_LIST_HEAD(&fs_devs->list);
+ INIT_LIST_HEAD(&fs_devs->fs_list);
if (fsid)
memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
return fs_devs;
}
-static void free_device(struct btrfs_device *device)
+void btrfs_free_device(struct btrfs_device *device)
{
rcu_string_free(device->name);
bio_put(device->flush_bio);
@@ -249,7 +282,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
device = list_entry(fs_devices->devices.next,
struct btrfs_device, dev_list);
list_del(&device->dev_list);
- free_device(device);
+ btrfs_free_device(device);
}
kfree(fs_devices);
}
@@ -273,8 +306,8 @@ void __exit btrfs_cleanup_fs_uuids(void)
while (!list_empty(&fs_uuids)) {
fs_devices = list_entry(fs_uuids.next,
- struct btrfs_fs_devices, list);
- list_del(&fs_devices->list);
+ struct btrfs_fs_devices, fs_list);
+ list_del(&fs_devices->fs_list);
free_fs_devices(fs_devices);
}
}
@@ -282,7 +315,7 @@ void __exit btrfs_cleanup_fs_uuids(void)
/*
* Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
* Returned struct is not linked onto any lists and must be destroyed using
- * free_device.
+ * btrfs_free_device.
*/
static struct btrfs_device *__alloc_device(void)
{
@@ -327,10 +360,9 @@ static struct btrfs_device *__alloc_device(void)
static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
u64 devid, const u8 *uuid)
{
- struct list_head *head = &fs_devices->devices;
struct btrfs_device *dev;
- list_for_each_entry(dev, head, dev_list) {
+ list_for_each_entry(dev, &fs_devices->devices, dev_list) {
if (dev->devid == devid &&
(!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
return dev;
@@ -343,7 +375,7 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
{
struct btrfs_fs_devices *fs_devices;
- list_for_each_entry(fs_devices, &fs_uuids, list) {
+ list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
return fs_devices;
}
@@ -607,7 +639,7 @@ static void btrfs_free_stale_devices(const char *path,
struct btrfs_fs_devices *fs_devs, *tmp_fs_devs;
struct btrfs_device *dev, *tmp_dev;
- list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, list) {
+ list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, fs_list) {
if (fs_devs->opened)
continue;
@@ -632,13 +664,13 @@ static void btrfs_free_stale_devices(const char *path,
/* delete the stale device */
if (fs_devs->num_devices == 1) {
btrfs_sysfs_remove_fsid(fs_devs);
- list_del(&fs_devs->list);
+ list_del(&fs_devs->fs_list);
free_fs_devices(fs_devs);
break;
} else {
fs_devs->num_devices--;
list_del(&dev->dev_list);
- free_device(dev);
+ btrfs_free_device(dev);
}
}
}
@@ -732,7 +764,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (IS_ERR(fs_devices))
return ERR_CAST(fs_devices);
- list_add(&fs_devices->list, &fs_uuids);
+ list_add(&fs_devices->fs_list, &fs_uuids);
device = NULL;
} else {
@@ -753,7 +785,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
name = rcu_string_strdup(path, GFP_NOFS);
if (!name) {
- free_device(device);
+ btrfs_free_device(device);
return ERR_PTR(-ENOMEM);
}
rcu_assign_pointer(device->name, name);
@@ -866,7 +898,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
name = rcu_string_strdup(orig_dev->name->str,
GFP_KERNEL);
if (!name) {
- free_device(device);
+ btrfs_free_device(device);
goto error;
}
rcu_assign_pointer(device->name, name);
@@ -938,7 +970,7 @@ again:
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
- free_device(device);
+ btrfs_free_device(device);
}
if (fs_devices->seed) {
@@ -956,7 +988,7 @@ static void free_device_rcu(struct rcu_head *head)
struct btrfs_device *device;
device = container_of(head, struct btrfs_device, rcu);
- free_device(device);
+ btrfs_free_device(device);
}
static void btrfs_close_bdev(struct btrfs_device *device)
@@ -1005,7 +1037,7 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device)
new_device->fs_devices = device->fs_devices;
}
-static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
+static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *device, *tmp;
struct list_head pending_put;
@@ -1050,7 +1082,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
int ret;
mutex_lock(&uuid_mutex);
- ret = __btrfs_close_devices(fs_devices);
+ ret = close_fs_devices(fs_devices);
if (!fs_devices->opened) {
seed_devices = fs_devices->seed;
fs_devices->seed = NULL;
@@ -1060,23 +1092,22 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
while (seed_devices) {
fs_devices = seed_devices;
seed_devices = fs_devices->seed;
- __btrfs_close_devices(fs_devices);
+ close_fs_devices(fs_devices);
free_fs_devices(fs_devices);
}
return ret;
}
-static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
+static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder)
{
- struct list_head *head = &fs_devices->devices;
struct btrfs_device *device;
struct btrfs_device *latest_dev = NULL;
int ret = 0;
flags |= FMODE_EXCL;
- list_for_each_entry(device, head, dev_list) {
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
/* Just open everything we can; ignore failures here */
if (btrfs_open_one_device(fs_devices, device, flags, holder))
continue;
@@ -1115,15 +1146,16 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
{
int ret;
- mutex_lock(&uuid_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
if (fs_devices->opened) {
fs_devices->opened++;
ret = 0;
} else {
list_sort(NULL, &fs_devices->devices, devid_cmp);
- ret = __btrfs_open_devices(fs_devices, flags, holder);
+ ret = open_fs_devices(fs_devices, flags, holder);
}
- mutex_unlock(&uuid_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
+
return ret;
}
@@ -1201,31 +1233,29 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
*/
bytenr = btrfs_sb_offset(0);
flags |= FMODE_EXCL;
- mutex_lock(&uuid_mutex);
bdev = blkdev_get_by_path(path, flags, holder);
- if (IS_ERR(bdev)) {
- ret = PTR_ERR(bdev);
- goto error;
- }
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
ret = -EINVAL;
goto error_bdev_put;
}
+ mutex_lock(&uuid_mutex);
device = device_list_add(path, disk_super);
if (IS_ERR(device))
ret = PTR_ERR(device);
else
*fs_devices_ret = device->fs_devices;
+ mutex_unlock(&uuid_mutex);
btrfs_release_disk_super(page);
error_bdev_put:
blkdev_put(bdev, flags);
-error:
- mutex_unlock(&uuid_mutex);
+
return ret;
}
@@ -1857,11 +1887,11 @@ static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
} while (read_seqretry(&fs_info->profiles_lock, seq));
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
- if (!(all_avail & btrfs_raid_group[i]))
+ if (!(all_avail & btrfs_raid_array[i].bg_flag))
continue;
if (num_devices < btrfs_raid_array[i].devs_min) {
- int ret = btrfs_raid_mindev_error[i];
+ int ret = btrfs_raid_array[i].mindev_error;
if (ret)
return ret;
@@ -1917,13 +1947,13 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
{
struct btrfs_device *device;
struct btrfs_fs_devices *cur_devices;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
u64 num_devices;
int ret = 0;
- mutex_lock(&fs_info->volume_mutex);
mutex_lock(&uuid_mutex);
- num_devices = fs_info->fs_devices->num_devices;
+ num_devices = fs_devices->num_devices;
btrfs_dev_replace_read_lock(&fs_info->dev_replace);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
WARN_ON(num_devices < 1);
@@ -1986,27 +2016,32 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
* (super_copy) should hold the device list mutex.
*/
+ /*
+ * In normal cases the cur_devices == fs_devices. But in case
+ * of deleting a seed device, the cur_devices should point to
+ * its own fs_devices listed under the fs_devices->seed.
+ */
cur_devices = device->fs_devices;
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
list_del_rcu(&device->dev_list);
- device->fs_devices->num_devices--;
- device->fs_devices->total_devices--;
+ cur_devices->num_devices--;
+ cur_devices->total_devices--;
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
- device->fs_devices->missing_devices--;
+ cur_devices->missing_devices--;
btrfs_assign_next_active_device(fs_info, device, NULL);
if (device->bdev) {
- device->fs_devices->open_devices--;
+ cur_devices->open_devices--;
/* remove sysfs entry */
- btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
+ btrfs_sysfs_rm_device_link(fs_devices, device);
}
num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
/*
* at this point, the device is zero sized and detached from
@@ -2020,8 +2055,6 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
call_rcu(&device->rcu, free_device_rcu);
if (cur_devices->open_devices == 0) {
- struct btrfs_fs_devices *fs_devices;
- fs_devices = fs_info->fs_devices;
while (fs_devices) {
if (fs_devices->seed == cur_devices) {
fs_devices->seed = cur_devices->seed;
@@ -2030,20 +2063,19 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
fs_devices = fs_devices->seed;
}
cur_devices->seed = NULL;
- __btrfs_close_devices(cur_devices);
+ close_fs_devices(cur_devices);
free_fs_devices(cur_devices);
}
out:
mutex_unlock(&uuid_mutex);
- mutex_unlock(&fs_info->volume_mutex);
return ret;
error_undo:
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
mutex_lock(&fs_info->chunk_mutex);
list_add(&device->dev_alloc_list,
- &fs_info->fs_devices->alloc_list);
+ &fs_devices->alloc_list);
device->fs_devices->rw_devices++;
mutex_unlock(&fs_info->chunk_mutex);
}
@@ -2112,7 +2144,7 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
tmp_fs_devices = tmp_fs_devices->seed;
}
fs_devices->seed = NULL;
- __btrfs_close_devices(fs_devices);
+ close_fs_devices(fs_devices);
free_fs_devices(fs_devices);
}
}
@@ -2120,23 +2152,23 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev)
{
- mutex_lock(&uuid_mutex);
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+
WARN_ON(!tgtdev);
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
- btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
+ btrfs_sysfs_rm_device_link(fs_devices, tgtdev);
if (tgtdev->bdev)
- fs_info->fs_devices->open_devices--;
+ fs_devices->open_devices--;
- fs_info->fs_devices->num_devices--;
+ fs_devices->num_devices--;
btrfs_assign_next_active_device(fs_info, tgtdev, NULL);
list_del_rcu(&tgtdev->dev_list);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- mutex_unlock(&uuid_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
/*
* The update_dev_time() with in btrfs_scratch_superblocks()
@@ -2188,10 +2220,6 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
struct btrfs_device *tmp;
devices = &fs_info->fs_devices->devices;
- /*
- * It is safe to read the devices since the volume_mutex
- * is held by the caller.
- */
list_for_each_entry(tmp, devices, dev_list) {
if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
&tmp->dev_state) && !tmp->bdev) {
@@ -2259,7 +2287,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
return PTR_ERR(old_devices);
}
- list_add(&old_devices->list, &fs_uuids);
+ list_add(&old_devices->fs_list, &fs_uuids);
memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
seed_devices->opened = 1;
@@ -2570,7 +2598,7 @@ error_trans:
if (trans)
btrfs_end_transaction(trans);
error_free_device:
- free_device(device);
+ btrfs_free_device(device);
error:
blkdev_put(bdev, FMODE_EXCL);
if (seeding_dev && !unlocked) {
@@ -2580,99 +2608,6 @@ error:
return ret;
}
-int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- const char *device_path,
- struct btrfs_device *srcdev,
- struct btrfs_device **device_out)
-{
- struct btrfs_device *device;
- struct block_device *bdev;
- struct list_head *devices;
- struct rcu_string *name;
- u64 devid = BTRFS_DEV_REPLACE_DEVID;
- int ret = 0;
-
- *device_out = NULL;
- if (fs_info->fs_devices->seeding) {
- btrfs_err(fs_info, "the filesystem is a seed filesystem!");
- return -EINVAL;
- }
-
- bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
- fs_info->bdev_holder);
- if (IS_ERR(bdev)) {
- btrfs_err(fs_info, "target device %s is invalid!", device_path);
- return PTR_ERR(bdev);
- }
-
- filemap_write_and_wait(bdev->bd_inode->i_mapping);
-
- devices = &fs_info->fs_devices->devices;
- list_for_each_entry(device, devices, dev_list) {
- if (device->bdev == bdev) {
- btrfs_err(fs_info,
- "target device is in the filesystem!");
- ret = -EEXIST;
- goto error;
- }
- }
-
-
- if (i_size_read(bdev->bd_inode) <
- btrfs_device_get_total_bytes(srcdev)) {
- btrfs_err(fs_info,
- "target device is smaller than source device!");
- ret = -EINVAL;
- goto error;
- }
-
-
- device = btrfs_alloc_device(NULL, &devid, NULL);
- if (IS_ERR(device)) {
- ret = PTR_ERR(device);
- goto error;
- }
-
- name = rcu_string_strdup(device_path, GFP_KERNEL);
- if (!name) {
- free_device(device);
- ret = -ENOMEM;
- goto error;
- }
- rcu_assign_pointer(device->name, name);
-
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
- device->generation = 0;
- device->io_width = fs_info->sectorsize;
- device->io_align = fs_info->sectorsize;
- device->sector_size = fs_info->sectorsize;
- device->total_bytes = btrfs_device_get_total_bytes(srcdev);
- device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
- device->bytes_used = btrfs_device_get_bytes_used(srcdev);
- device->commit_total_bytes = srcdev->commit_total_bytes;
- device->commit_bytes_used = device->bytes_used;
- device->fs_info = fs_info;
- device->bdev = bdev;
- set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
- set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
- device->mode = FMODE_EXCL;
- device->dev_stats_valid = 1;
- set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
- device->fs_devices = fs_info->fs_devices;
- list_add(&device->dev_list, &fs_info->fs_devices->devices);
- fs_info->fs_devices->num_devices++;
- fs_info->fs_devices->open_devices++;
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-
- *device_out = device;
- return ret;
-
-error:
- blkdev_put(bdev, FMODE_EXCL);
- return ret;
-}
-
static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device)
{
@@ -3273,24 +3208,12 @@ static void update_balance_args(struct btrfs_balance_control *bctl)
}
/*
- * Should be called with both balance and volume mutexes held to
- * serialize other volume operations (add_dev/rm_dev/resize) with
- * restriper. Same goes for unset_balance_control.
+ * Clear the balance status in fs_info and delete the balance item from disk.
*/
-static void set_balance_control(struct btrfs_balance_control *bctl)
-{
- struct btrfs_fs_info *fs_info = bctl->fs_info;
-
- BUG_ON(fs_info->balance_ctl);
-
- spin_lock(&fs_info->balance_lock);
- fs_info->balance_ctl = bctl;
- spin_unlock(&fs_info->balance_lock);
-}
-
-static void unset_balance_control(struct btrfs_fs_info *fs_info)
+static void reset_balance_state(struct btrfs_fs_info *fs_info)
{
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
+ int ret;
BUG_ON(!fs_info->balance_ctl);
@@ -3299,6 +3222,9 @@ static void unset_balance_control(struct btrfs_fs_info *fs_info)
spin_unlock(&fs_info->balance_lock);
kfree(bctl);
+ ret = del_balance_item(fs_info);
+ if (ret)
+ btrfs_handle_fs_error(fs_info, ret, NULL);
}
/*
@@ -3835,18 +3761,6 @@ static inline int balance_need_close(struct btrfs_fs_info *fs_info)
atomic_read(&fs_info->balance_cancel_req) == 0);
}
-static void __cancel_balance(struct btrfs_fs_info *fs_info)
-{
- int ret;
-
- unset_balance_control(fs_info);
- ret = del_balance_item(fs_info);
- if (ret)
- btrfs_handle_fs_error(fs_info, ret, NULL);
-
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
-}
-
/* Non-zero return value signifies invalidity */
static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
u64 allowed)
@@ -3857,12 +3771,12 @@ static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
}
/*
- * Should be called with both balance and volume mutexes held
+ * Should be called with balance mutexe held
*/
-int btrfs_balance(struct btrfs_balance_control *bctl,
+int btrfs_balance(struct btrfs_fs_info *fs_info,
+ struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs)
{
- struct btrfs_fs_info *fs_info = bctl->fs_info;
u64 meta_target, data_target;
u64 allowed;
int mixed = 0;
@@ -3891,7 +3805,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
!(bctl->flags & BTRFS_BALANCE_METADATA) ||
memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
btrfs_err(fs_info,
- "with mixed groups data and metadata balance options must be the same");
+ "balance: mixed groups data and metadata options must be the same");
ret = -EINVAL;
goto out;
}
@@ -3913,23 +3827,29 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
BTRFS_BLOCK_GROUP_RAID6);
if (validate_convert_profile(&bctl->data, allowed)) {
+ int index = btrfs_bg_flags_to_raid_index(bctl->data.target);
+
btrfs_err(fs_info,
- "unable to start balance with target data profile %llu",
- bctl->data.target);
+ "balance: invalid convert data profile %s",
+ get_raid_name(index));
ret = -EINVAL;
goto out;
}
if (validate_convert_profile(&bctl->meta, allowed)) {
+ int index = btrfs_bg_flags_to_raid_index(bctl->meta.target);
+
btrfs_err(fs_info,
- "unable to start balance with target metadata profile %llu",
- bctl->meta.target);
+ "balance: invalid convert metadata profile %s",
+ get_raid_name(index));
ret = -EINVAL;
goto out;
}
if (validate_convert_profile(&bctl->sys, allowed)) {
+ int index = btrfs_bg_flags_to_raid_index(bctl->sys.target);
+
btrfs_err(fs_info,
- "unable to start balance with target system profile %llu",
- bctl->sys.target);
+ "balance: invalid convert system profile %s",
+ get_raid_name(index));
ret = -EINVAL;
goto out;
}
@@ -3950,10 +3870,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
!(bctl->meta.target & allowed))) {
if (bctl->flags & BTRFS_BALANCE_FORCE) {
btrfs_info(fs_info,
- "force reducing metadata integrity");
+ "balance: force reducing metadata integrity");
} else {
btrfs_err(fs_info,
- "balance will reduce metadata integrity, use force if you want this");
+ "balance: reduces metadata integrity, use --force if you want this");
ret = -EINVAL;
goto out;
}
@@ -3967,9 +3887,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
bctl->data.target : fs_info->avail_data_alloc_bits;
if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
+ int meta_index = btrfs_bg_flags_to_raid_index(meta_target);
+ int data_index = btrfs_bg_flags_to_raid_index(data_target);
+
btrfs_warn(fs_info,
- "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
- meta_target, data_target);
+ "balance: metadata profile %s has lower redundancy than data profile %s",
+ get_raid_name(meta_index), get_raid_name(data_index));
}
ret = insert_balance_item(fs_info, bctl);
@@ -3978,7 +3901,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
BUG_ON(ret == -EEXIST);
- set_balance_control(bctl);
+ BUG_ON(fs_info->balance_ctl);
+ spin_lock(&fs_info->balance_lock);
+ fs_info->balance_ctl = bctl;
+ spin_unlock(&fs_info->balance_lock);
} else {
BUG_ON(ret != -EEXIST);
spin_lock(&fs_info->balance_lock);
@@ -3986,22 +3912,24 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
spin_unlock(&fs_info->balance_lock);
}
- atomic_inc(&fs_info->balance_running);
+ ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+ set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
mutex_unlock(&fs_info->balance_mutex);
ret = __btrfs_balance(fs_info);
mutex_lock(&fs_info->balance_mutex);
- atomic_dec(&fs_info->balance_running);
+ clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
if (bargs) {
memset(bargs, 0, sizeof(*bargs));
- update_ioctl_balance_args(fs_info, 0, bargs);
+ btrfs_update_ioctl_balance_args(fs_info, bargs);
}
if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
balance_need_close(fs_info)) {
- __cancel_balance(fs_info);
+ reset_balance_state(fs_info);
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
}
wake_up(&fs_info->balance_wait_q);
@@ -4009,11 +3937,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
return ret;
out:
if (bctl->flags & BTRFS_BALANCE_RESUME)
- __cancel_balance(fs_info);
- else {
+ reset_balance_state(fs_info);
+ else
kfree(bctl);
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
- }
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+
return ret;
}
@@ -4022,16 +3950,12 @@ static int balance_kthread(void *data)
struct btrfs_fs_info *fs_info = data;
int ret = 0;
- mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
-
if (fs_info->balance_ctl) {
- btrfs_info(fs_info, "continuing balance");
- ret = btrfs_balance(fs_info->balance_ctl, NULL);
+ btrfs_info(fs_info, "balance: resuming");
+ ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
}
-
mutex_unlock(&fs_info->balance_mutex);
- mutex_unlock(&fs_info->volume_mutex);
return ret;
}
@@ -4040,18 +3964,27 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
{
struct task_struct *tsk;
- spin_lock(&fs_info->balance_lock);
+ mutex_lock(&fs_info->balance_mutex);
if (!fs_info->balance_ctl) {
- spin_unlock(&fs_info->balance_lock);
+ mutex_unlock(&fs_info->balance_mutex);
return 0;
}
- spin_unlock(&fs_info->balance_lock);
+ mutex_unlock(&fs_info->balance_mutex);
if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
- btrfs_info(fs_info, "force skipping balance");
+ btrfs_info(fs_info, "balance: resume skipped");
return 0;
}
+ /*
+ * A ro->rw remount sequence should continue with the paused balance
+ * regardless of who pauses it, system or the user as of now, so set
+ * the resume flag.
+ */
+ spin_lock(&fs_info->balance_lock);
+ fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
+ spin_unlock(&fs_info->balance_lock);
+
tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
return PTR_ERR_OR_ZERO(tsk);
}
@@ -4091,7 +4024,6 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
- bctl->fs_info = fs_info;
bctl->flags = btrfs_balance_flags(leaf, item);
bctl->flags |= BTRFS_BALANCE_RESUME;
@@ -4102,15 +4034,26 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
- WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
+ /*
+ * This should never happen, as the paused balance state is recovered
+ * during mount without any chance of other exclusive ops to collide.
+ *
+ * This gives the exclusive op status to balance and keeps in paused
+ * state until user intervention (cancel or umount). If the ownership
+ * cannot be assigned, show a message but do not fail. The balance
+ * is in a paused state and must have fs_info::balance_ctl properly
+ * set up.
+ */
+ if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
+ btrfs_warn(fs_info,
+ "balance: cannot set exclusive op status, resume manually");
- mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
-
- set_balance_control(bctl);
-
+ BUG_ON(fs_info->balance_ctl);
+ spin_lock(&fs_info->balance_lock);
+ fs_info->balance_ctl = bctl;
+ spin_unlock(&fs_info->balance_lock);
mutex_unlock(&fs_info->balance_mutex);
- mutex_unlock(&fs_info->volume_mutex);
out:
btrfs_free_path(path);
return ret;
@@ -4126,16 +4069,16 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
return -ENOTCONN;
}
- if (atomic_read(&fs_info->balance_running)) {
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
atomic_inc(&fs_info->balance_pause_req);
mutex_unlock(&fs_info->balance_mutex);
wait_event(fs_info->balance_wait_q,
- atomic_read(&fs_info->balance_running) == 0);
+ !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
mutex_lock(&fs_info->balance_mutex);
/* we are good with balance_ctl ripped off from under us */
- BUG_ON(atomic_read(&fs_info->balance_running));
+ BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_pause_req);
} else {
ret = -ENOTCONN;
@@ -4147,38 +4090,49 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
{
- if (sb_rdonly(fs_info->sb))
- return -EROFS;
-
mutex_lock(&fs_info->balance_mutex);
if (!fs_info->balance_ctl) {
mutex_unlock(&fs_info->balance_mutex);
return -ENOTCONN;
}
+ /*
+ * A paused balance with the item stored on disk can be resumed at
+ * mount time if the mount is read-write. Otherwise it's still paused
+ * and we must not allow cancelling as it deletes the item.
+ */
+ if (sb_rdonly(fs_info->sb)) {
+ mutex_unlock(&fs_info->balance_mutex);
+ return -EROFS;
+ }
+
atomic_inc(&fs_info->balance_cancel_req);
/*
* if we are running just wait and return, balance item is
* deleted in btrfs_balance in this case
*/
- if (atomic_read(&fs_info->balance_running)) {
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
mutex_unlock(&fs_info->balance_mutex);
wait_event(fs_info->balance_wait_q,
- atomic_read(&fs_info->balance_running) == 0);
+ !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
mutex_lock(&fs_info->balance_mutex);
} else {
- /* __cancel_balance needs volume_mutex */
mutex_unlock(&fs_info->balance_mutex);
- mutex_lock(&fs_info->volume_mutex);
+ /*
+ * Lock released to allow other waiters to continue, we'll
+ * reexamine the status again.
+ */
mutex_lock(&fs_info->balance_mutex);
- if (fs_info->balance_ctl)
- __cancel_balance(fs_info);
-
- mutex_unlock(&fs_info->volume_mutex);
+ if (fs_info->balance_ctl) {
+ reset_balance_state(fs_info);
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+ btrfs_info(fs_info, "balance: canceled");
+ }
}
- BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
+ BUG_ON(fs_info->balance_ctl ||
+ test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_cancel_req);
mutex_unlock(&fs_info->balance_mutex);
return 0;
@@ -4255,8 +4209,7 @@ static int btrfs_uuid_scan_kthread(void *data)
}
update_tree:
if (!btrfs_is_empty_uuid(root_item.uuid)) {
- ret = btrfs_uuid_tree_add(trans, fs_info,
- root_item.uuid,
+ ret = btrfs_uuid_tree_add(trans, root_item.uuid,
BTRFS_UUID_KEY_SUBVOL,
key.objectid);
if (ret < 0) {
@@ -4267,7 +4220,7 @@ update_tree:
}
if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
- ret = btrfs_uuid_tree_add(trans, fs_info,
+ ret = btrfs_uuid_tree_add(trans,
root_item.received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
key.objectid);
@@ -4473,7 +4426,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
if (!path)
return -ENOMEM;
- path->reada = READA_FORWARD;
+ path->reada = READA_BACK;
mutex_lock(&fs_info->chunk_mutex);
@@ -6034,9 +5987,8 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
}
-int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
- u64 chunk_start, u64 physical, u64 devid,
- u64 **logical, int *naddrs, int *stripe_len)
+int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
+ u64 physical, u64 **logical, int *naddrs, int *stripe_len)
{
struct extent_map *em;
struct map_lookup *map;
@@ -6068,8 +6020,6 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
BUG_ON(!buf); /* -ENOMEM */
for (i = 0; i < map->num_stripes; i++) {
- if (devid && map->stripes[i].dev->devid != devid)
- continue;
if (map->stripes[i].physical > physical ||
map->stripes[i].physical + length <= physical)
continue;
@@ -6401,7 +6351,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
*
* Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
* on error. Returned struct is not linked onto any lists and must be
- * destroyed with free_device.
+ * destroyed with btrfs_free_device.
*/
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u64 *devid,
@@ -6424,7 +6374,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
ret = find_next_devid(fs_info, &tmp);
if (ret) {
- free_device(dev);
+ btrfs_free_device(dev);
return ERR_PTR(ret);
}
}
@@ -6675,8 +6625,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
if (IS_ERR(fs_devices))
return fs_devices;
- ret = __btrfs_open_devices(fs_devices, FMODE_READ,
- fs_info->bdev_holder);
+ ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
if (ret) {
free_fs_devices(fs_devices);
fs_devices = ERR_PTR(ret);
@@ -6684,7 +6633,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
}
if (!fs_devices->seeding) {
- __btrfs_close_devices(fs_devices);
+ close_fs_devices(fs_devices);
free_fs_devices(fs_devices);
fs_devices = ERR_PTR(-EINVAL);
goto out;
@@ -6993,6 +6942,10 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
if (!path)
return -ENOMEM;
+ /*
+ * uuid_mutex is needed only if we are mounting a sprout FS
+ * otherwise we don't need it.
+ */
mutex_lock(&uuid_mutex);
mutex_lock(&fs_info->chunk_mutex);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 79096884654f..5139ec8daf4c 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -208,6 +208,7 @@ BTRFS_DEVICE_GETSET_FUNCS(bytes_used);
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+ struct list_head fs_list;
u64 num_devices;
u64 open_devices;
@@ -229,7 +230,6 @@ struct btrfs_fs_devices {
struct list_head resized_devices;
/* devices not currently being allocated */
struct list_head alloc_list;
- struct list_head list;
struct btrfs_fs_devices *seed;
int seeding;
@@ -329,11 +329,12 @@ struct btrfs_raid_attr {
int tolerated_failures; /* max tolerated fail devs */
int devs_increment; /* ndevs has to be a multiple of this */
int ncopies; /* how many copies to data has */
+ int mindev_error; /* error code if min devs requisite is unmet */
+ const char raid_name[8]; /* name of the raid */
+ u64 bg_flag; /* block group flag of the raid */
};
extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES];
-extern const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES];
-extern const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES];
struct map_lookup {
u64 type;
@@ -351,8 +352,6 @@ struct map_lookup {
struct btrfs_balance_args;
struct btrfs_balance_progress;
struct btrfs_balance_control {
- struct btrfs_fs_info *fs_info;
-
struct btrfs_balance_args data;
struct btrfs_balance_args meta;
struct btrfs_balance_args sys;
@@ -393,9 +392,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret);
-int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
- u64 chunk_start, u64 physical, u64 devid,
- u64 **logical, int *naddrs, int *stripe_len);
+int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
+ u64 physical, u64 **logical, int *naddrs, int *stripe_len);
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
@@ -421,6 +419,7 @@ int btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid,
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u64 *devid,
const u8 *uuid);
+void btrfs_free_device(struct btrfs_device *device);
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
const char *device_path, u64 devid);
void __exit btrfs_cleanup_fs_uuids(void);
@@ -431,11 +430,8 @@ struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
u8 *uuid, u8 *fsid);
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
-int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- const char *device_path,
- struct btrfs_device *srcdev,
- struct btrfs_device **device_out);
-int btrfs_balance(struct btrfs_balance_control *bctl,
+int btrfs_balance(struct btrfs_fs_info *fs_info,
+ struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs);
int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
@@ -553,6 +549,8 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
}
+const char *get_raid_name(enum btrfs_raid_types type);
+
void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans);
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 0daa1e3fe0df..ab0bbe93b398 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -572,6 +572,11 @@ lookup_again:
if (ret < 0)
goto create_error;
+ if (unlikely(d_unhashed(next))) {
+ dput(next);
+ inode_unlock(d_inode(dir));
+ goto lookup_again;
+ }
ASSERT(d_backing_inode(next));
_debug("mkdir -> %p{%p{ino=%lu}}",
@@ -764,6 +769,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
/* search the current directory for the element name */
inode_lock(d_inode(dir));
+retry:
start = jiffies;
subdir = lookup_one_len(dirname, dir, strlen(dirname));
cachefiles_hist(cachefiles_lookup_histogram, start);
@@ -793,6 +799,10 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
if (ret < 0)
goto mkdir_error;
+ if (unlikely(d_unhashed(subdir))) {
+ dput(subdir);
+ goto retry;
+ }
ASSERT(d_backing_inode(subdir));
_debug("mkdir -> %p{%p{ino=%lu}}",
diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c
index 125b90f6c796..0ce1aa56b67f 100644
--- a/fs/cachefiles/proc.c
+++ b/fs/cachefiles/proc.c
@@ -85,21 +85,6 @@ static const struct seq_operations cachefiles_histogram_ops = {
};
/*
- * open "/proc/fs/cachefiles/XXX" which provide statistics summaries
- */
-static int cachefiles_histogram_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &cachefiles_histogram_ops);
-}
-
-static const struct file_operations cachefiles_histogram_fops = {
- .open = cachefiles_histogram_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-/*
* initialise the /proc/fs/cachefiles/ directory
*/
int __init cachefiles_proc_init(void)
@@ -109,8 +94,8 @@ int __init cachefiles_proc_init(void)
if (!proc_mkdir("fs/cachefiles", NULL))
goto error_dir;
- if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL,
- &cachefiles_histogram_fops))
+ if (!proc_create_seq("fs/cachefiles/histogram", S_IFREG | 0444, NULL,
+ &cachefiles_histogram_ops))
goto error_histogram;
_leave(" = 0");
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index f85040d73e3d..cf0e45b10121 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -70,69 +70,104 @@ static __le32 ceph_flags_sys2wire(u32 flags)
*/
/*
- * Calculate the length sum of direct io vectors that can
- * be combined into one page vector.
+ * How many pages to get in one call to iov_iter_get_pages(). This
+ * determines the size of the on-stack array used as a buffer.
*/
-static size_t dio_get_pagev_size(const struct iov_iter *it)
+#define ITER_GET_BVECS_PAGES 64
+
+static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize,
+ struct bio_vec *bvecs)
{
- const struct iovec *iov = it->iov;
- const struct iovec *iovend = iov + it->nr_segs;
- size_t size;
-
- size = iov->iov_len - it->iov_offset;
- /*
- * An iov can be page vectored when both the current tail
- * and the next base are page aligned.
- */
- while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
- (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
- size += iov->iov_len;
- }
- dout("dio_get_pagevlen len = %zu\n", size);
- return size;
+ size_t size = 0;
+ int bvec_idx = 0;
+
+ if (maxsize > iov_iter_count(iter))
+ maxsize = iov_iter_count(iter);
+
+ while (size < maxsize) {
+ struct page *pages[ITER_GET_BVECS_PAGES];
+ ssize_t bytes;
+ size_t start;
+ int idx = 0;
+
+ bytes = iov_iter_get_pages(iter, pages, maxsize - size,
+ ITER_GET_BVECS_PAGES, &start);
+ if (bytes < 0)
+ return size ?: bytes;
+
+ iov_iter_advance(iter, bytes);
+ size += bytes;
+
+ for ( ; bytes; idx++, bvec_idx++) {
+ struct bio_vec bv = {
+ .bv_page = pages[idx],
+ .bv_len = min_t(int, bytes, PAGE_SIZE - start),
+ .bv_offset = start,
+ };
+
+ bvecs[bvec_idx] = bv;
+ bytes -= bv.bv_len;
+ start = 0;
+ }
+ }
+
+ return size;
}
/*
- * Allocate a page vector based on (@it, @nbytes).
- * The return value is the tuple describing a page vector,
- * that is (@pages, @page_align, @num_pages).
+ * iov_iter_get_pages() only considers one iov_iter segment, no matter
+ * what maxsize or maxpages are given. For ITER_BVEC that is a single
+ * page.
+ *
+ * Attempt to get up to @maxsize bytes worth of pages from @iter.
+ * Return the number of bytes in the created bio_vec array, or an error.
*/
-static struct page **
-dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
- size_t *page_align, int *num_pages)
+static ssize_t iter_get_bvecs_alloc(struct iov_iter *iter, size_t maxsize,
+ struct bio_vec **bvecs, int *num_bvecs)
{
- struct iov_iter tmp_it = *it;
- size_t align;
- struct page **pages;
- int ret = 0, idx, npages;
+ struct bio_vec *bv;
+ size_t orig_count = iov_iter_count(iter);
+ ssize_t bytes;
+ int npages;
- align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
- (PAGE_SIZE - 1);
- npages = calc_pages_for(align, nbytes);
- pages = kvmalloc(sizeof(*pages) * npages, GFP_KERNEL);
- if (!pages)
- return ERR_PTR(-ENOMEM);
+ iov_iter_truncate(iter, maxsize);
+ npages = iov_iter_npages(iter, INT_MAX);
+ iov_iter_reexpand(iter, orig_count);
- for (idx = 0; idx < npages; ) {
- size_t start;
- ret = iov_iter_get_pages(&tmp_it, pages + idx, nbytes,
- npages - idx, &start);
- if (ret < 0)
- goto fail;
+ /*
+ * __iter_get_bvecs() may populate only part of the array -- zero it
+ * out.
+ */
+ bv = kvmalloc_array(npages, sizeof(*bv), GFP_KERNEL | __GFP_ZERO);
+ if (!bv)
+ return -ENOMEM;
- iov_iter_advance(&tmp_it, ret);
- nbytes -= ret;
- idx += (ret + start + PAGE_SIZE - 1) / PAGE_SIZE;
+ bytes = __iter_get_bvecs(iter, maxsize, bv);
+ if (bytes < 0) {
+ /*
+ * No pages were pinned -- just free the array.
+ */
+ kvfree(bv);
+ return bytes;
}
- BUG_ON(nbytes != 0);
- *num_pages = npages;
- *page_align = align;
- dout("dio_get_pages_alloc: got %d pages align %zu\n", npages, align);
- return pages;
-fail:
- ceph_put_page_vector(pages, idx, false);
- return ERR_PTR(ret);
+ *bvecs = bv;
+ *num_bvecs = npages;
+ return bytes;
+}
+
+static void put_bvecs(struct bio_vec *bvecs, int num_bvecs, bool should_dirty)
+{
+ int i;
+
+ for (i = 0; i < num_bvecs; i++) {
+ if (bvecs[i].bv_page) {
+ if (should_dirty)
+ set_page_dirty_lock(bvecs[i].bv_page);
+ put_page(bvecs[i].bv_page);
+ }
+ }
+ kvfree(bvecs);
}
/*
@@ -746,11 +781,12 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
struct inode *inode = req->r_inode;
struct ceph_aio_request *aio_req = req->r_priv;
struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
- int num_pages = calc_pages_for((u64)osd_data->alignment,
- osd_data->length);
- dout("ceph_aio_complete_req %p rc %d bytes %llu\n",
- inode, rc, osd_data->length);
+ BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
+ BUG_ON(!osd_data->num_bvecs);
+
+ dout("ceph_aio_complete_req %p rc %d bytes %u\n",
+ inode, rc, osd_data->bvec_pos.iter.bi_size);
if (rc == -EOLDSNAPC) {
struct ceph_aio_work *aio_work;
@@ -768,9 +804,10 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
} else if (!aio_req->write) {
if (rc == -ENOENT)
rc = 0;
- if (rc >= 0 && osd_data->length > rc) {
- int zoff = osd_data->alignment + rc;
- int zlen = osd_data->length - rc;
+ if (rc >= 0 && osd_data->bvec_pos.iter.bi_size > rc) {
+ struct iov_iter i;
+ int zlen = osd_data->bvec_pos.iter.bi_size - rc;
+
/*
* If read is satisfied by single OSD request,
* it can pass EOF. Otherwise read is within
@@ -785,13 +822,16 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
aio_req->total_len = rc + zlen;
}
- if (zlen > 0)
- ceph_zero_page_vector_range(zoff, zlen,
- osd_data->pages);
+ iov_iter_bvec(&i, ITER_BVEC, osd_data->bvec_pos.bvecs,
+ osd_data->num_bvecs,
+ osd_data->bvec_pos.iter.bi_size);
+ iov_iter_advance(&i, rc);
+ iov_iter_zero(zlen, &i);
}
}
- ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
+ put_bvecs(osd_data->bvec_pos.bvecs, osd_data->num_bvecs,
+ aio_req->should_dirty);
ceph_osdc_put_request(req);
if (rc < 0)
@@ -879,7 +919,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_vino vino;
struct ceph_osd_request *req;
- struct page **pages;
+ struct bio_vec *bvecs;
struct ceph_aio_request *aio_req = NULL;
int num_pages = 0;
int flags;
@@ -914,10 +954,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
}
while (iov_iter_count(iter) > 0) {
- u64 size = dio_get_pagev_size(iter);
- size_t start = 0;
+ u64 size = iov_iter_count(iter);
ssize_t len;
+ if (write)
+ size = min_t(u64, size, fsc->mount_options->wsize);
+ else
+ size = min_t(u64, size, fsc->mount_options->rsize);
+
vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
vino, pos, &size, 0,
@@ -933,18 +977,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
break;
}
- if (write)
- size = min_t(u64, size, fsc->mount_options->wsize);
- else
- size = min_t(u64, size, fsc->mount_options->rsize);
-
- len = size;
- pages = dio_get_pages_alloc(iter, len, &start, &num_pages);
- if (IS_ERR(pages)) {
+ len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages);
+ if (len < 0) {
ceph_osdc_put_request(req);
- ret = PTR_ERR(pages);
+ ret = len;
break;
}
+ if (len != size)
+ osd_req_op_extent_update(req, 0, len);
/*
* To simplify error handling, allow AIO when IO within i_size
@@ -977,8 +1017,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
req->r_mtime = mtime;
}
- osd_req_op_extent_osd_data_pages(req, 0, pages, len, start,
- false, false);
+ osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
if (aio_req) {
aio_req->total_len += len;
@@ -991,7 +1030,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs);
pos += len;
- iov_iter_advance(iter, len);
continue;
}
@@ -1004,25 +1042,26 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (ret == -ENOENT)
ret = 0;
if (ret >= 0 && ret < len && pos + ret < size) {
+ struct iov_iter i;
int zlen = min_t(size_t, len - ret,
size - pos - ret);
- ceph_zero_page_vector_range(start + ret, zlen,
- pages);
+
+ iov_iter_bvec(&i, ITER_BVEC, bvecs, num_pages,
+ len);
+ iov_iter_advance(&i, ret);
+ iov_iter_zero(zlen, &i);
ret += zlen;
}
if (ret >= 0)
len = ret;
}
- ceph_put_page_vector(pages, num_pages, should_dirty);
-
+ put_bvecs(bvecs, num_pages, should_dirty);
ceph_osdc_put_request(req);
if (ret < 0)
break;
pos += len;
- iov_iter_advance(iter, len);
-
if (!write && pos >= size)
break;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 741749a98614..5f132d59dfc2 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -197,7 +197,7 @@ config CIFS_SMB311
config CIFS_SMB_DIRECT
bool "SMB Direct support (Experimental)"
- depends on CIFS=m && INFINIBAND || CIFS=y && INFINIBAND=y
+ depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y
help
Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1.
SMB Direct allows transferring SMB packets over RDMA. If unsure,
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 7e4a1e2f0696..85817991ee68 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -1,11 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
#
-# Makefile for Linux CIFS VFS client
+# Makefile for Linux CIFS/SMB2/SMB3 VFS client
#
+ccflags-y += -I$(src) # needed for trace events
obj-$(CONFIG_CIFS) += cifs.o
-cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
- link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
+cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \
+ inode.o link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
cifs_unicode.o nterr.o cifsencrypt.o \
readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o \
smb2ops.o smb2maperror.o smb2transport.o \
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 9d69ea433330..116146022aa1 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -42,7 +42,7 @@ cifs_dump_mem(char *label, void *data, int length)
data, length, true);
}
-void cifs_dump_detail(void *buf)
+void cifs_dump_detail(void *buf, struct TCP_Server_Info *server)
{
#ifdef CONFIG_CIFS_DEBUG2
struct smb_hdr *smb = (struct smb_hdr *)buf;
@@ -50,7 +50,8 @@ void cifs_dump_detail(void *buf)
cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d\n",
smb->Command, smb->Status.CifsError,
smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
- cifs_dbg(VFS, "smb buf %p len %u\n", smb, smbCalcSize(smb));
+ cifs_dbg(VFS, "smb buf %p len %u\n", smb,
+ server->ops->calc_smb_size(smb, server));
#endif /* CONFIG_CIFS_DEBUG2 */
}
@@ -83,7 +84,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
cifs_dbg(VFS, "IsMult: %d IsEnd: %d\n",
mid_entry->multiRsp, mid_entry->multiEnd);
if (mid_entry->resp_buf) {
- cifs_dump_detail(mid_entry->resp_buf);
+ cifs_dump_detail(mid_entry->resp_buf, server);
cifs_dump_mem("existing buf: ",
mid_entry->resp_buf, 62);
}
@@ -113,6 +114,8 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
seq_printf(m, " type: %d ", dev_type);
if (tcon->seal)
seq_printf(m, " Encrypted");
+ if (tcon->nocase)
+ seq_printf(m, " nocase");
if (tcon->unix_ext)
seq_printf(m, " POSIX Extensions");
if (tcon->ses->server->ops->dump_share_caps)
@@ -237,6 +240,10 @@ skip_rdma:
server->credits, server->dialect);
if (server->sign)
seq_printf(m, " signed");
+#ifdef CONFIG_CIFS_SMB311
+ if (server->posix_ext_supported)
+ seq_printf(m, " posix");
+#endif /* 3.1.1 */
i++;
list_for_each(tmp2, &server->smb_ses_list) {
ses = list_entry(tmp2, struct cifs_ses,
@@ -314,18 +321,6 @@ skip_rdma:
return 0;
}
-static int cifs_debug_data_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, cifs_debug_data_proc_show, NULL);
-}
-
-static const struct file_operations cifs_debug_data_proc_fops = {
- .open = cifs_debug_data_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
#ifdef CONFIG_CIFS_STATS
static ssize_t cifs_stats_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
@@ -497,35 +492,36 @@ cifs_proc_init(void)
if (proc_fs_cifs == NULL)
return;
- proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops);
+ proc_create_single("DebugData", 0, proc_fs_cifs,
+ cifs_debug_data_proc_show);
#ifdef CONFIG_CIFS_STATS
- proc_create("Stats", 0, proc_fs_cifs, &cifs_stats_proc_fops);
+ proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_fops);
#endif /* STATS */
- proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
- proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
- proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
+ proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_fops);
+ proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_fops);
+ proc_create("LinuxExtensionsEnabled", 0644, proc_fs_cifs,
&cifs_linux_ext_proc_fops);
- proc_create("SecurityFlags", 0, proc_fs_cifs,
+ proc_create("SecurityFlags", 0644, proc_fs_cifs,
&cifs_security_flags_proc_fops);
- proc_create("LookupCacheEnabled", 0, proc_fs_cifs,
+ proc_create("LookupCacheEnabled", 0644, proc_fs_cifs,
&cifs_lookup_cache_proc_fops);
#ifdef CONFIG_CIFS_SMB_DIRECT
- proc_create("rdma_readwrite_threshold", 0, proc_fs_cifs,
+ proc_create("rdma_readwrite_threshold", 0644, proc_fs_cifs,
&cifs_rdma_readwrite_threshold_proc_fops);
- proc_create("smbd_max_frmr_depth", 0, proc_fs_cifs,
+ proc_create("smbd_max_frmr_depth", 0644, proc_fs_cifs,
&cifs_smbd_max_frmr_depth_proc_fops);
- proc_create("smbd_keep_alive_interval", 0, proc_fs_cifs,
+ proc_create("smbd_keep_alive_interval", 0644, proc_fs_cifs,
&cifs_smbd_keep_alive_interval_proc_fops);
- proc_create("smbd_max_receive_size", 0, proc_fs_cifs,
+ proc_create("smbd_max_receive_size", 0644, proc_fs_cifs,
&cifs_smbd_max_receive_size_proc_fops);
- proc_create("smbd_max_fragmented_recv_size", 0, proc_fs_cifs,
+ proc_create("smbd_max_fragmented_recv_size", 0644, proc_fs_cifs,
&cifs_smbd_max_fragmented_recv_size_proc_fops);
- proc_create("smbd_max_send_size", 0, proc_fs_cifs,
+ proc_create("smbd_max_send_size", 0644, proc_fs_cifs,
&cifs_smbd_max_send_size_proc_fops);
- proc_create("smbd_send_credit_target", 0, proc_fs_cifs,
+ proc_create("smbd_send_credit_target", 0644, proc_fs_cifs,
&cifs_smbd_send_credit_target_proc_fops);
- proc_create("smbd_receive_credit_max", 0, proc_fs_cifs,
+ proc_create("smbd_receive_credit_max", 0644, proc_fs_cifs,
&cifs_smbd_receive_credit_max_proc_fops);
#endif
}
@@ -583,6 +579,8 @@ static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
cifsFYI = bv;
else if ((c[0] > '1') && (c[0] <= '9'))
cifsFYI = (int) (c[0] - '0'); /* see cifs_debug.h for meanings */
+ else
+ return -EINVAL;
return count;
}
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 0e74690d11bc..f4f3f0853c6e 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -23,7 +23,7 @@
#define _H_CIFS_DEBUG
void cifs_dump_mem(char *label, void *data, int length);
-void cifs_dump_detail(void *);
+void cifs_dump_detail(void *buf, struct TCP_Server_Info *ptcp_info);
void cifs_dump_mids(struct TCP_Server_Info *);
extern bool traceSMB; /* flag which enables the function below */
void dump_smb(void *, int);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 350fa55a1bf7..9731d0d891e7 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -50,6 +50,7 @@
* root mountable
*/
#define CIFS_MOUNT_UID_FROM_ACL 0x2000000 /* try to get UID via special SID */
+#define CIFS_MOUNT_NO_HANDLE_CACHE 0x4000000 /* disable caching dir handles */
struct cifs_sb_info {
struct rb_root tlink_tree;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f715609b13f3..eb7b6573f322 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -58,13 +58,15 @@ bool traceSMB;
bool enable_oplocks = true;
bool linuxExtEnabled = true;
bool lookupCacheEnabled = true;
+bool disable_legacy_dialects; /* false by default */
unsigned int global_secflags = CIFSSEC_DEF;
/* unsigned int ntlmv2_support = 0; */
unsigned int sign_CIFS_PDUs = 1;
static const struct super_operations cifs_super_ops;
unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
module_param(CIFSMaxBufSize, uint, 0444);
-MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header). "
+MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header) "
+ "for CIFS requests. "
"Default: 16384 Range: 8192 to 130048");
unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL;
module_param(cifs_min_rcv, uint, 0444);
@@ -76,11 +78,21 @@ MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 "
"Range: 2 to 256");
unsigned int cifs_max_pending = CIFS_MAX_REQ;
module_param(cifs_max_pending, uint, 0444);
-MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. "
+MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
+ "CIFS/SMB1 dialect (N/A for SMB3) "
"Default: 32767 Range: 2 to 32767.");
module_param(enable_oplocks, bool, 0644);
MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1");
+module_param(disable_legacy_dialects, bool, 0644);
+MODULE_PARM_DESC(disable_legacy_dialects, "To improve security it may be "
+ "helpful to restrict the ability to "
+ "override the default dialects (SMB2.1, "
+ "SMB3 and SMB3.02) on mount with old "
+ "dialects (CIFS/SMB1 and SMB2) since "
+ "vers=1.0 (CIFS/SMB1) and vers=2.0 are weaker"
+ " and less secure. Default: n/N/0");
+
extern mempool_t *cifs_sm_req_poolp;
extern mempool_t *cifs_req_poolp;
extern mempool_t *cifs_mid_poolp;
@@ -469,10 +481,20 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",persistenthandles");
else if (tcon->use_resilient)
seq_puts(s, ",resilienthandles");
+
+#ifdef CONFIG_CIFS_SMB311
+ if (tcon->posix_extensions)
+ seq_puts(s, ",posix");
+ else if (tcon->unix_ext)
+ seq_puts(s, ",unix");
+ else
+ seq_puts(s, ",nounix");
+#else
if (tcon->unix_ext)
seq_puts(s, ",unix");
else
seq_puts(s, ",nounix");
+#endif /* SMB311 */
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
seq_puts(s, ",posixpaths");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
@@ -495,6 +517,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",sfu");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
seq_puts(s, ",nobrl");
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_HANDLE_CACHE)
+ seq_puts(s, ",nohandlecache");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
seq_puts(s, ",cifsacl");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
@@ -897,6 +921,17 @@ struct file_system_type cifs_fs_type = {
/* .fs_flags */
};
MODULE_ALIAS_FS("cifs");
+
+static struct file_system_type smb3_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "smb3",
+ .mount = cifs_do_mount,
+ .kill_sb = cifs_kill_sb,
+ /* .fs_flags */
+};
+MODULE_ALIAS_FS("smb3");
+MODULE_ALIAS("smb3");
+
const struct inode_operations cifs_dir_inode_ops = {
.create = cifs_create,
.atomic_open = cifs_atomic_open,
@@ -1047,6 +1082,18 @@ out:
return rc;
}
+/*
+ * Directory operations under CIFS/SMB2/SMB3 are synchronous, so fsync()
+ * is a dummy operation.
+ */
+static int cifs_dir_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ cifs_dbg(FYI, "Sync directory - name: %pD datasync: 0x%x\n",
+ file, datasync);
+
+ return 0;
+}
+
static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff,
size_t len, unsigned int flags)
@@ -1181,6 +1228,7 @@ const struct file_operations cifs_dir_ops = {
.copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range,
.llseek = generic_file_llseek,
+ .fsync = cifs_dir_fsync,
};
static void
@@ -1422,6 +1470,12 @@ init_cifs(void)
if (rc)
goto out_init_cifs_idmap;
+ rc = register_filesystem(&smb3_fs_type);
+ if (rc) {
+ unregister_filesystem(&cifs_fs_type);
+ goto out_init_cifs_idmap;
+ }
+
return 0;
out_init_cifs_idmap:
@@ -1452,8 +1506,9 @@ out_clean_proc:
static void __exit
exit_cifs(void)
{
- cifs_dbg(NOISY, "exit_cifs\n");
+ cifs_dbg(NOISY, "exit_smb3\n");
unregister_filesystem(&cifs_fs_type);
+ unregister_filesystem(&smb3_fs_type);
cifs_dfs_release_automount_timer();
#ifdef CONFIG_CIFS_ACL
exit_cifs_idmap();
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 013ba2aed8d9..5f0231803431 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.11"
+#define CIFS_VERSION "2.12"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index cb950a5fa078..08d1cdd96701 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -176,6 +176,7 @@ struct smb_rqst {
struct kvec *rq_iov; /* array of kvecs */
unsigned int rq_nvec; /* number of kvecs in array */
struct page **rq_pages; /* pointer to array of page ptrs */
+ unsigned int rq_offset; /* the offset to the 1st page */
unsigned int rq_npages; /* number pages in array */
unsigned int rq_pagesz; /* page size to use */
unsigned int rq_tailsz; /* length of last page */
@@ -244,7 +245,7 @@ struct smb_version_operations {
int (*map_error)(char *, bool);
/* find mid corresponding to the response message */
struct mid_q_entry * (*find_mid)(struct TCP_Server_Info *, char *);
- void (*dump_detail)(void *);
+ void (*dump_detail)(void *buf, struct TCP_Server_Info *ptcp_info);
void (*clear_stats)(struct cifs_tcon *);
void (*print_stats)(struct seq_file *m, struct cifs_tcon *);
void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *);
@@ -372,7 +373,7 @@ struct smb_version_operations {
int (*close_dir)(const unsigned int, struct cifs_tcon *,
struct cifs_fid *);
/* calculate a size of SMB message */
- unsigned int (*calc_smb_size)(void *);
+ unsigned int (*calc_smb_size)(void *buf, struct TCP_Server_Info *ptcpi);
/* check for STATUS_PENDING and process it in a positive case */
bool (*is_status_pending)(char *, struct TCP_Server_Info *, int);
/* check for STATUS_NETWORK_SESSION_EXPIRED */
@@ -417,7 +418,7 @@ struct smb_version_operations {
/* create lease context buffer for CREATE request */
char * (*create_lease_buf)(u8 *, u8);
/* parse lease context buffer and return oplock/epoch info */
- __u8 (*parse_lease_buf)(void *, unsigned int *);
+ __u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey);
ssize_t (*copychunk_range)(const unsigned int,
struct cifsFileInfo *src_file,
struct cifsFileInfo *target_file,
@@ -457,7 +458,7 @@ struct smb_version_operations {
struct mid_q_entry **);
enum securityEnum (*select_sectype)(struct TCP_Server_Info *,
enum securityEnum);
-
+ int (*next_header)(char *);
};
struct smb_version_values {
@@ -521,10 +522,12 @@ struct smb_vol {
bool sfu_remap:1; /* remap seven reserved chars ala SFU */
bool posix_paths:1; /* unset to not ask for posix pathnames. */
bool no_linux_ext:1;
+ bool linux_ext:1;
bool sfu_emul:1;
bool nullauth:1; /* attempt to authenticate with null user */
bool nocase:1; /* request case insensitive filenames */
bool nobrl:1; /* disable sending byte range locks to srv */
+ bool nohandlecache:1; /* disable caching dir handles if srvr probs */
bool mand_lock:1; /* send mandatory not posix byte range lock reqs */
bool seal:1; /* request transport encryption on share */
bool nodfs:1; /* Do not request DFS, even if available */
@@ -630,7 +633,7 @@ struct TCP_Server_Info {
bool oplocks:1; /* enable oplocks */
unsigned int maxReq; /* Clients should submit no more */
/* than maxReq distinct unanswered SMBs to the server when using */
- /* multiplexed reads or writes */
+ /* multiplexed reads or writes (for SMB1/CIFS only, not SMB2/SMB3) */
unsigned int maxBuf; /* maxBuf specifies the maximum */
/* message size the server can send or receive for non-raw SMBs */
/* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */
@@ -681,6 +684,7 @@ struct TCP_Server_Info {
__le16 cipher_type;
/* save initital negprot hash */
__u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
+ bool posix_ext_supported;
#endif /* 3.1.1 */
struct delayed_work reconnect; /* reconnect workqueue job */
struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
@@ -953,9 +957,13 @@ struct cifs_tcon {
bool print:1; /* set if connection to printer share */
bool retry:1;
bool nocase:1;
+ bool nohandlecache:1; /* if strange server resource prob can turn off */
bool seal:1; /* transport encryption for this mounted share */
bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol
for this mount even if server would support */
+#ifdef CONFIG_CIFS_SMB311
+ bool posix_extensions; /* if true SMB3.11 posix extensions enabled */
+#endif /* CIFS_311 */
bool local_lease:1; /* check leases (only) on local system not remote */
bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
bool broken_sparse_sup; /* if server or share does not support sparse */
@@ -979,6 +987,9 @@ struct cifs_tcon {
struct fscache_cookie *fscache; /* cookie for share */
#endif
struct list_head pending_opens; /* list of incomplete opens */
+ bool valid_root_fid:1; /* Do we have a useable root fid */
+ struct mutex prfid_mutex; /* prevents reopen race after dead ses*/
+ struct cifs_fid *prfid; /* handle to the directory at top of share */
/* BB add field for back pointer to sb struct(s)? */
};
@@ -1071,6 +1082,7 @@ struct cifs_open_parms {
int create_options;
const char *path;
struct cifs_fid *fid;
+ umode_t mode;
bool reconnect:1;
};
@@ -1169,10 +1181,11 @@ struct cifs_readdata {
struct smbd_mr *mr;
#endif
unsigned int pagesz;
+ unsigned int page_offset;
unsigned int tailsz;
unsigned int credits;
unsigned int nr_pages;
- struct page *pages[];
+ struct page **pages;
};
struct cifs_writedata;
@@ -1194,10 +1207,11 @@ struct cifs_writedata {
struct smbd_mr *mr;
#endif
unsigned int pagesz;
+ unsigned int page_offset;
unsigned int tailsz;
unsigned int credits;
unsigned int nr_pages;
- struct page *pages[];
+ struct page **pages;
};
/*
@@ -1692,16 +1706,17 @@ GLOBAL_EXTERN atomic_t smBufAllocCount;
GLOBAL_EXTERN atomic_t midCount;
/* Misc globals */
-GLOBAL_EXTERN bool enable_oplocks; /* enable or disable oplocks */
-GLOBAL_EXTERN bool lookupCacheEnabled;
-GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent
+extern bool enable_oplocks; /* enable or disable oplocks */
+extern bool lookupCacheEnabled;
+extern unsigned int global_secflags; /* if on, session setup sent
with more secure ntlmssp2 challenge/resp */
-GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */
-GLOBAL_EXTERN bool linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
-GLOBAL_EXTERN unsigned int CIFSMaxBufSize; /* max size not including hdr */
-GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
-GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */
-GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
+extern unsigned int sign_CIFS_PDUs; /* enable smb packet signing */
+extern bool linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
+extern unsigned int CIFSMaxBufSize; /* max size not including hdr */
+extern unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
+extern unsigned int cifs_min_small; /* min size of small buf pool */
+extern unsigned int cifs_max_pending; /* MAX requests at once to server*/
+extern bool disable_legacy_dialects; /* forbid vers=1.0 and vers=2.0 mounts */
#ifdef CONFIG_CIFS_ACL
GLOBAL_EXTERN struct rb_root uidtree;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 365a414a75e9..7933c5f9c076 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -21,6 +21,7 @@
#ifndef _CIFSPROTO_H
#define _CIFSPROTO_H
#include <linux/nls.h>
+#include "trace.h"
struct statfs;
struct smb_vol;
@@ -47,6 +48,7 @@ extern void _free_xid(unsigned int);
cifs_dbg(FYI, "CIFS VFS: in %s as Xid: %u with uid: %d\n", \
__func__, __xid, \
from_kuid(&init_user_ns, current_fsuid())); \
+ trace_smb3_enter(__xid, __func__); \
__xid; \
})
@@ -54,7 +56,11 @@ extern void _free_xid(unsigned int);
do { \
_free_xid(curr_xid); \
cifs_dbg(FYI, "CIFS VFS: leaving %s (xid = %u) rc = %d\n", \
- __func__, curr_xid, (int)rc); \
+ __func__, curr_xid, (int)rc); \
+ if (rc) \
+ trace_smb3_exit_err(curr_xid, __func__, (int)rc); \
+ else \
+ trace_smb3_exit_done(curr_xid, __func__); \
} while (0)
extern int init_cifs_idmap(void);
extern void exit_cifs_idmap(void);
@@ -124,7 +130,7 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
unsigned int bytes_written);
extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
-extern unsigned int smbCalcSize(void *buf);
+extern unsigned int smbCalcSize(void *buf, struct TCP_Server_Info *server);
extern int decode_negTokenInit(unsigned char *security_blob, int length,
struct TCP_Server_Info *server);
extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
@@ -197,7 +203,9 @@ extern void dequeue_mid(struct mid_q_entry *mid, bool malformed);
extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
unsigned int to_read);
extern int cifs_read_page_from_socket(struct TCP_Server_Info *server,
- struct page *page, unsigned int to_read);
+ struct page *page,
+ unsigned int page_offset,
+ unsigned int to_read);
extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
struct cifs_sb_info *cifs_sb);
extern int cifs_match_super(struct super_block *, void *);
@@ -525,6 +533,8 @@ int cifs_async_writev(struct cifs_writedata *wdata,
void cifs_writev_complete(struct work_struct *work);
struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages,
work_func_t complete);
+struct cifs_writedata *cifs_writedata_direct_alloc(struct page **pages,
+ work_func_t complete);
void cifs_writedata_release(struct kref *refcount);
int cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 1529a088383d..5aca336642c0 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -106,6 +106,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
open_file->oplock_break_cancelled = true;
}
spin_unlock(&tcon->open_file_lock);
+
+ mutex_lock(&tcon->prfid_mutex);
+ tcon->valid_root_fid = false;
+ memset(tcon->prfid, 0, sizeof(struct cifs_fid));
+ mutex_unlock(&tcon->prfid_mutex);
+
/*
* BB Add call to invalidate_inodes(sb) for all superblocks mounted
* to this tcon.
@@ -1946,6 +1952,7 @@ cifs_writedata_release(struct kref *refcount)
if (wdata->cfile)
cifsFileInfo_put(wdata->cfile);
+ kvfree(wdata->pages);
kfree(wdata);
}
@@ -2069,12 +2076,22 @@ cifs_writev_complete(struct work_struct *work)
struct cifs_writedata *
cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
{
+ struct page **pages =
+ kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+ if (pages)
+ return cifs_writedata_direct_alloc(pages, complete);
+
+ return NULL;
+}
+
+struct cifs_writedata *
+cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
+{
struct cifs_writedata *wdata;
- /* writedata + number of page pointers */
- wdata = kzalloc(sizeof(*wdata) +
- sizeof(struct page *) * nr_pages, GFP_NOFS);
+ wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
if (wdata != NULL) {
+ wdata->pages = pages;
kref_init(&wdata->refcount);
INIT_LIST_HEAD(&wdata->list);
init_completion(&wdata->done);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a5aa158d535a..e5a2fe7f0dd4 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -61,6 +61,7 @@
#define RFC1001_PORT 139
extern mempool_t *cifs_req_poolp;
+extern bool disable_legacy_dialects;
/* FIXME: should these be tunable? */
#define TLINK_ERROR_EXPIRE (1 * HZ)
@@ -76,9 +77,10 @@ enum {
Opt_mapposix, Opt_nomapposix,
Opt_mapchars, Opt_nomapchars, Opt_sfu,
Opt_nosfu, Opt_nodfs, Opt_posixpaths,
- Opt_noposixpaths, Opt_nounix,
+ Opt_noposixpaths, Opt_nounix, Opt_unix,
Opt_nocase,
Opt_brl, Opt_nobrl,
+ Opt_handlecache, Opt_nohandlecache,
Opt_forcemandatorylock, Opt_setuidfromacl, Opt_setuids,
Opt_nosetuids, Opt_dynperm, Opt_nodynperm,
Opt_nohard, Opt_nosoft,
@@ -144,10 +146,16 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_noposixpaths, "noposixpaths" },
{ Opt_nounix, "nounix" },
{ Opt_nounix, "nolinux" },
+ { Opt_nounix, "noposix" },
+ { Opt_unix, "unix" },
+ { Opt_unix, "linux" },
+ { Opt_unix, "posix" },
{ Opt_nocase, "nocase" },
{ Opt_nocase, "ignorecase" },
{ Opt_brl, "brl" },
{ Opt_nobrl, "nobrl" },
+ { Opt_handlecache, "handlecache" },
+ { Opt_nohandlecache, "nohandlecache" },
{ Opt_nobrl, "nolock" },
{ Opt_forcemandatorylock, "forcemandatorylock" },
{ Opt_forcemandatorylock, "forcemand" },
@@ -591,10 +599,11 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
int
cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
- unsigned int to_read)
+ unsigned int page_offset, unsigned int to_read)
{
struct msghdr smb_msg;
- struct bio_vec bv = {.bv_page = page, .bv_len = to_read};
+ struct bio_vec bv = {
+ .bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
iov_iter_bvec(&smb_msg.msg_iter, READ | ITER_BVEC, &bv, 1, to_read);
return cifs_readv_from_socket(server, &smb_msg);
}
@@ -848,6 +857,7 @@ cifs_demultiplex_thread(void *p)
int length;
struct TCP_Server_Info *server = p;
unsigned int pdu_length;
+ unsigned int next_offset;
char *buf = NULL;
struct task_struct *task_to_wake = NULL;
struct mid_q_entry *mid_entry;
@@ -874,24 +884,29 @@ cifs_demultiplex_thread(void *p)
length = cifs_read_from_socket(server, buf, pdu_length);
if (length < 0)
continue;
- server->total_read = length;
+
+ if (server->vals->header_preamble_size == 0)
+ server->total_read = 0;
+ else
+ server->total_read = length;
/*
* The right amount was read from socket - 4 bytes,
* so we can now interpret the length field.
*/
pdu_length = get_rfc1002_length(buf);
- server->pdu_size = pdu_length;
cifs_dbg(FYI, "RFC1002 header 0x%x\n", pdu_length);
if (!is_smb_response(server, buf[0]))
continue;
+next_pdu:
+ server->pdu_size = pdu_length;
/* make sure we have enough to get to the MID */
- if (pdu_length < HEADER_SIZE(server) - 1 -
+ if (server->pdu_size < HEADER_SIZE(server) - 1 -
server->vals->header_preamble_size) {
cifs_dbg(VFS, "SMB response too short (%u bytes)\n",
- pdu_length);
+ server->pdu_size);
cifs_reconnect(server);
wake_up(&server->response_q);
continue;
@@ -906,6 +921,12 @@ cifs_demultiplex_thread(void *p)
continue;
server->total_read += length;
+ if (server->ops->next_header) {
+ next_offset = server->ops->next_header(buf);
+ if (next_offset)
+ server->pdu_size = next_offset;
+ }
+
if (server->ops->is_transform_hdr &&
server->ops->receive_transform &&
server->ops->is_transform_hdr(buf)) {
@@ -948,10 +969,18 @@ cifs_demultiplex_thread(void *p)
HEADER_SIZE(server));
#ifdef CONFIG_CIFS_DEBUG2
if (server->ops->dump_detail)
- server->ops->dump_detail(buf);
+ server->ops->dump_detail(buf, server);
cifs_dump_mids(server);
#endif /* CIFS_DEBUG2 */
-
+ }
+ if (pdu_length > server->pdu_size) {
+ if (!allocate_buffers(server))
+ continue;
+ pdu_length -= server->pdu_size;
+ server->total_read = 0;
+ server->large_buf = false;
+ buf = server->smallbuf;
+ goto next_pdu;
}
} /* end while !EXITING */
@@ -1143,10 +1172,18 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
switch (match_token(value, cifs_smb_version_tokens, args)) {
case Smb_1:
+ if (disable_legacy_dialects) {
+ cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+ return 1;
+ }
vol->ops = &smb1_operations;
vol->vals = &smb1_values;
break;
case Smb_20:
+ if (disable_legacy_dialects) {
+ cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+ return 1;
+ }
vol->ops = &smb20_operations;
vol->vals = &smb20_values;
break;
@@ -1426,8 +1463,17 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->posix_paths = 0;
break;
case Opt_nounix:
+ if (vol->linux_ext)
+ cifs_dbg(VFS,
+ "conflicting unix mount options\n");
vol->no_linux_ext = 1;
break;
+ case Opt_unix:
+ if (vol->no_linux_ext)
+ cifs_dbg(VFS,
+ "conflicting unix mount options\n");
+ vol->linux_ext = 1;
+ break;
case Opt_nocase:
vol->nocase = 1;
break;
@@ -1445,6 +1491,12 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
(S_IALLUGO & ~(S_ISUID | S_IXGRP)))
vol->file_mode = S_IALLUGO;
break;
+ case Opt_nohandlecache:
+ vol->nohandlecache = 1;
+ break;
+ case Opt_handlecache:
+ vol->nohandlecache = 0;
+ break;
case Opt_forcemandatorylock:
vol->mand_lock = 1;
break;
@@ -1977,14 +2029,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
goto cifs_parse_mount_err;
}
-#ifdef CONFIG_CIFS_SMB_DIRECT
- if (vol->rdma && vol->sign) {
- cifs_dbg(VFS, "Currently SMB direct doesn't support signing."
- " This is being fixed\n");
- goto cifs_parse_mount_err;
- }
-#endif
-
#ifndef CONFIG_KEYS
/* Muliuser mounts require CONFIG_KEYS support */
if (vol->multiuser) {
@@ -2975,6 +3019,13 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
}
}
+#ifdef CONFIG_CIFS_SMB311
+ if ((volume_info->linux_ext) && (ses->server->posix_ext_supported)) {
+ if (ses->server->vals->protocol_id == SMB311_PROT_ID)
+ tcon->posix_extensions = true;
+ }
+#endif /* 311 */
+
/*
* BB Do we need to wrap session_mutex around this TCon call and Unix
* SetFS as we do on SessSetup and reconnect?
@@ -3030,6 +3081,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
*/
tcon->retry = volume_info->retry;
tcon->nocase = volume_info->nocase;
+ tcon->nohandlecache = volume_info->nohandlecache;
tcon->local_lease = volume_info->local_lease;
INIT_LIST_HEAD(&tcon->pending_opens);
@@ -3588,6 +3640,8 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
if (pvolume_info->nobrl)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
+ if (pvolume_info->nohandlecache)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_HANDLE_CACHE;
if (pvolume_info->nostrictsync)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC;
if (pvolume_info->mand_lock)
@@ -3930,6 +3984,12 @@ try_mount_again:
goto remote_path_check;
}
+#ifdef CONFIG_CIFS_SMB311
+ /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
+ if (tcon->posix_extensions)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
+#endif /* SMB3.11 */
+
/* tell server which Unix caps we support */
if (cap_unix(tcon->ses)) {
/* reset of caps checks mount to see if unix extensions
@@ -4361,6 +4421,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
vol_info->UNC = master_tcon->treeName;
vol_info->retry = master_tcon->retry;
vol_info->nocase = master_tcon->nocase;
+ vol_info->nohandlecache = master_tcon->nohandlecache;
vol_info->local_lease = master_tcon->local_lease;
vol_info->no_linux_ext = !master_tcon->unix_ext;
vol_info->sectype = master_tcon->ses->sectype;
@@ -4390,8 +4451,14 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
goto out;
}
+#ifdef CONFIG_CIFS_SMB311
+ /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
+ if (tcon->posix_extensions)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
+#endif /* SMB3.11 */
if (cap_unix(ses))
reset_cifs_unix_caps(0, tcon, NULL, vol_info);
+
out:
kfree(vol_info->username);
kzfree(vol_info->password);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 925844343038..ddae52bd1993 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -369,7 +369,7 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
oparms.path = full_path;
oparms.fid = fid;
oparms.reconnect = false;
-
+ oparms.mode = mode;
rc = server->ops->open(xid, &oparms, oplock, buf);
if (rc) {
cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc);
@@ -780,21 +780,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink)) {
free_xid(xid);
- return (struct dentry *)tlink;
+ return ERR_CAST(tlink);
}
pTcon = tlink_tcon(tlink);
rc = check_name(direntry, pTcon);
- if (rc)
- goto lookup_out;
+ if (unlikely(rc)) {
+ cifs_put_tlink(tlink);
+ free_xid(xid);
+ return ERR_PTR(rc);
+ }
/* can not grab the rename sem here since it would
deadlock in the cases (beginning of sys_rename itself)
in which we already have the sb rename sem */
full_path = build_path_from_dentry(direntry);
if (full_path == NULL) {
- rc = -ENOMEM;
- goto lookup_out;
+ cifs_put_tlink(tlink);
+ free_xid(xid);
+ return ERR_PTR(-ENOMEM);
}
if (d_really_is_positive(direntry)) {
@@ -813,29 +817,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
parent_dir_inode->i_sb, xid, NULL);
}
- if ((rc == 0) && (newInode != NULL)) {
- d_add(direntry, newInode);
+ if (rc == 0) {
/* since paths are not looked up by component - the parent
directories are presumed to be good here */
renew_parental_timestamps(direntry);
-
} else if (rc == -ENOENT) {
- rc = 0;
cifs_set_time(direntry, jiffies);
- d_add(direntry, NULL);
- /* if it was once a directory (but how can we tell?) we could do
- shrink_dcache_parent(direntry); */
- } else if (rc != -EACCES) {
- cifs_dbg(FYI, "Unexpected lookup error %d\n", rc);
- /* We special case check for Access Denied - since that
- is a common return code */
+ newInode = NULL;
+ } else {
+ if (rc != -EACCES) {
+ cifs_dbg(FYI, "Unexpected lookup error %d\n", rc);
+ /* We special case check for Access Denied - since that
+ is a common return code */
+ }
+ newInode = ERR_PTR(rc);
}
-
-lookup_out:
kfree(full_path);
cifs_put_tlink(tlink);
free_xid(xid);
- return ERR_PTR(rc);
+ return d_splice_alias(newInode, direntry);
}
static int
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 23fd430fe74a..87eece6fbd48 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2880,13 +2880,13 @@ out:
}
static struct cifs_readdata *
-cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
+cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
{
struct cifs_readdata *rdata;
- rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
- GFP_KERNEL);
+ rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
if (rdata != NULL) {
+ rdata->pages = pages;
kref_init(&rdata->refcount);
INIT_LIST_HEAD(&rdata->list);
init_completion(&rdata->done);
@@ -2896,6 +2896,22 @@ cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
return rdata;
}
+static struct cifs_readdata *
+cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
+{
+ struct page **pages =
+ kzalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
+ struct cifs_readdata *ret = NULL;
+
+ if (pages) {
+ ret = cifs_readdata_direct_alloc(pages, complete);
+ if (!ret)
+ kfree(pages);
+ }
+
+ return ret;
+}
+
void
cifs_readdata_release(struct kref *refcount)
{
@@ -2910,6 +2926,7 @@ cifs_readdata_release(struct kref *refcount)
if (rdata->cfile)
cifsFileInfo_put(rdata->cfile);
+ kvfree(rdata->pages);
kfree(rdata);
}
@@ -3009,12 +3026,20 @@ uncached_fill_pages(struct TCP_Server_Info *server,
int result = 0;
unsigned int i;
unsigned int nr_pages = rdata->nr_pages;
+ unsigned int page_offset = rdata->page_offset;
rdata->got_bytes = 0;
rdata->tailsz = PAGE_SIZE;
for (i = 0; i < nr_pages; i++) {
struct page *page = rdata->pages[i];
size_t n;
+ unsigned int segment_size = rdata->pagesz;
+
+ if (i == 0)
+ segment_size -= page_offset;
+ else
+ page_offset = 0;
+
if (len <= 0) {
/* no need to hold page hostage */
@@ -3023,24 +3048,25 @@ uncached_fill_pages(struct TCP_Server_Info *server,
put_page(page);
continue;
}
+
n = len;
- if (len >= PAGE_SIZE) {
+ if (len >= segment_size)
/* enough data to fill the page */
- n = PAGE_SIZE;
- len -= n;
- } else {
- zero_user(page, len, PAGE_SIZE - len);
+ n = segment_size;
+ else
rdata->tailsz = len;
- len = 0;
- }
+ len -= n;
+
if (iter)
- result = copy_page_from_iter(page, 0, n, iter);
+ result = copy_page_from_iter(
+ page, page_offset, n, iter);
#ifdef CONFIG_CIFS_SMB_DIRECT
else if (rdata->mr)
result = n;
#endif
else
- result = cifs_read_page_from_socket(server, page, n);
+ result = cifs_read_page_from_socket(
+ server, page, page_offset, n);
if (result < 0)
break;
@@ -3113,6 +3139,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
rdata->bytes = cur_len;
rdata->pid = pid;
rdata->pagesz = PAGE_SIZE;
+ rdata->tailsz = PAGE_SIZE;
rdata->read_into_pages = cifs_uncached_read_into_pages;
rdata->copy_into_pages = cifs_uncached_copy_into_pages;
rdata->credits = credits;
@@ -3557,6 +3584,7 @@ readpages_fill_pages(struct TCP_Server_Info *server,
u64 eof;
pgoff_t eof_index;
unsigned int nr_pages = rdata->nr_pages;
+ unsigned int page_offset = rdata->page_offset;
/* determine the eof that the server (probably) has */
eof = CIFS_I(rdata->mapping->host)->server_eof;
@@ -3567,13 +3595,21 @@ readpages_fill_pages(struct TCP_Server_Info *server,
rdata->tailsz = PAGE_SIZE;
for (i = 0; i < nr_pages; i++) {
struct page *page = rdata->pages[i];
- size_t n = PAGE_SIZE;
+ unsigned int to_read = rdata->pagesz;
+ size_t n;
+
+ if (i == 0)
+ to_read -= page_offset;
+ else
+ page_offset = 0;
+
+ n = to_read;
- if (len >= PAGE_SIZE) {
- len -= PAGE_SIZE;
+ if (len >= to_read) {
+ len -= to_read;
} else if (len > 0) {
/* enough for partial page, fill and zero the rest */
- zero_user(page, len, PAGE_SIZE - len);
+ zero_user(page, len + page_offset, to_read - len);
n = rdata->tailsz = len;
len = 0;
} else if (page->index > eof_index) {
@@ -3605,13 +3641,15 @@ readpages_fill_pages(struct TCP_Server_Info *server,
}
if (iter)
- result = copy_page_from_iter(page, 0, n, iter);
+ result = copy_page_from_iter(
+ page, page_offset, n, iter);
#ifdef CONFIG_CIFS_SMB_DIRECT
else if (rdata->mr)
result = n;
#endif
else
- result = cifs_read_page_from_socket(server, page, n);
+ result = cifs_read_page_from_socket(
+ server, page, page_offset, n);
if (result < 0)
break;
@@ -3790,6 +3828,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
rdata->bytes = bytes;
rdata->pid = pid;
rdata->pagesz = PAGE_SIZE;
+ rdata->tailsz = PAGE_SIZE;
rdata->read_into_pages = cifs_readpages_read_into_pages;
rdata->copy_into_pages = cifs_readpages_copy_into_pages;
rdata->credits = credits;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3c371f7f5963..745fd7fe8d0e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -746,7 +746,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
cifs_dbg(FYI, "Getting info on %s\n", full_path);
if ((data == NULL) && (*inode != NULL)) {
- if (CIFS_CACHE_READ(CIFS_I(*inode))) {
+ if (CIFS_CACHE_READ(CIFS_I(*inode)) &&
+ CIFS_I(*inode)->time != 0) {
cifs_dbg(FYI, "No need to revalidate cached inode sizes\n");
goto cgii_exit;
}
@@ -1857,15 +1858,15 @@ cifs_inode_needs_reval(struct inode *inode)
struct cifsInodeInfo *cifs_i = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ if (cifs_i->time == 0)
+ return true;
+
if (CIFS_CACHE_READ(cifs_i))
return false;
if (!lookupCacheEnabled)
return true;
- if (cifs_i->time == 0)
- return true;
-
if (!cifs_sb->actimeo)
return true;
@@ -2104,10 +2105,14 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
static void cifs_setsize(struct inode *inode, loff_t offset)
{
+ struct cifsInodeInfo *cifs_i = CIFS_I(inode);
+
spin_lock(&inode->i_lock);
i_size_write(inode, offset);
spin_unlock(&inode->i_lock);
+ /* Cached inode must be refreshed on truncate */
+ cifs_i->time = 0;
truncate_pagecache(inode, offset);
}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 460084a8eac5..aba3fc3058da 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -117,6 +117,8 @@ tconInfoAlloc(void)
INIT_LIST_HEAD(&ret_buf->openFileList);
INIT_LIST_HEAD(&ret_buf->tcon_list);
spin_lock_init(&ret_buf->open_file_lock);
+ mutex_init(&ret_buf->prfid_mutex);
+ ret_buf->prfid = kzalloc(sizeof(struct cifs_fid), GFP_KERNEL);
#ifdef CONFIG_CIFS_STATS
spin_lock_init(&ret_buf->stat_lock);
#endif
@@ -134,6 +136,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free)
atomic_dec(&tconInfoAllocCount);
kfree(buf_to_free->nativeFileSystem);
kzfree(buf_to_free->password);
+ kfree(buf_to_free->prfid);
kfree(buf_to_free);
}
@@ -145,7 +148,7 @@ cifs_buf_get(void)
* SMB2 header is bigger than CIFS one - no problems to clean some
* more bytes for CIFS.
*/
- size_t buf_size = sizeof(struct smb2_hdr);
+ size_t buf_size = sizeof(struct smb2_sync_hdr);
/*
* We could use negotiated size instead of max_msgsize -
@@ -339,7 +342,7 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
/* otherwise, there is enough to get to the BCC */
if (check_smb_hdr(smb))
return -EIO;
- clc_len = smbCalcSize(smb);
+ clc_len = smbCalcSize(smb, server);
if (4 + rfclen != total_read) {
cifs_dbg(VFS, "Length read does not match RFC1001 length %d\n",
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index cc88f4f0325e..d7ad0dfe4e68 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -903,7 +903,7 @@ map_smb_to_linux_error(char *buf, bool logErr)
* portion, the number of word parameters and the data portion of the message
*/
unsigned int
-smbCalcSize(void *buf)
+smbCalcSize(void *buf, struct TCP_Server_Info *server)
{
struct smb_hdr *ptr = (struct smb_hdr *)buf;
return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index a27fc8791551..eeab81c9452f 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -650,7 +650,8 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
char *cur_ent;
char *end_of_smb = cfile->srch_inf.ntwrk_buf_start +
server->ops->calc_smb_size(
- cfile->srch_inf.ntwrk_buf_start);
+ cfile->srch_inf.ntwrk_buf_start,
+ server);
cur_ent = cfile->srch_inf.srch_entries_start;
first_entry_in_buffer = cfile->srch_inf.index_of_last_entry
@@ -831,7 +832,8 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
cifs_dbg(FYI, "loop through %d times filling dir for net buf %p\n",
num_to_fill, cifsFile->srch_inf.ntwrk_buf_start);
max_len = tcon->ses->server->ops->calc_smb_size(
- cifsFile->srch_inf.ntwrk_buf_start);
+ cifsFile->srch_inf.ntwrk_buf_start,
+ tcon->ses->server);
end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 401a5d856636..0ffa18094335 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -61,9 +61,4 @@
/* Maximum buffer size value we can send with 1 credit */
#define SMB2_MAX_BUFFER_SIZE 65536
-static inline struct smb2_sync_hdr *get_sync_hdr(void *buf)
-{
- return &(((struct smb2_hdr *)buf)->sync_hdr);
-}
-
#endif /* _SMB2_GLOB_H */
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 1238cd3552f9..a6e786e39248 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -44,26 +44,38 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
__u32 create_options, void *data, int command)
{
int rc, tmprc = 0;
- __le16 *utf16_path;
+ __le16 *utf16_path = NULL;
__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
struct cifs_open_parms oparms;
struct cifs_fid fid;
+ bool use_cached_root_handle = false;
- utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
- if (!utf16_path)
- return -ENOMEM;
+ if ((strcmp(full_path, "") == 0) && (create_options == 0) &&
+ (desired_access == FILE_READ_ATTRIBUTES) &&
+ (create_disposition == FILE_OPEN) &&
+ (tcon->nohandlecache == false)) {
+ rc = open_shroot(xid, tcon, &fid);
+ if (rc == 0)
+ use_cached_root_handle = true;
+ }
+
+ if (use_cached_root_handle == false) {
+ utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
+ if (!utf16_path)
+ return -ENOMEM;
- oparms.tcon = tcon;
- oparms.desired_access = desired_access;
- oparms.disposition = create_disposition;
- oparms.create_options = create_options;
- oparms.fid = &fid;
- oparms.reconnect = false;
+ oparms.tcon = tcon;
+ oparms.desired_access = desired_access;
+ oparms.disposition = create_disposition;
+ oparms.create_options = create_options;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
- rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
- if (rc) {
- kfree(utf16_path);
- return rc;
+ rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
+ if (rc) {
+ kfree(utf16_path);
+ return rc;
+ }
}
switch (command) {
@@ -107,7 +119,8 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
break;
}
- rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ if (use_cached_root_handle == false)
+ rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
if (tmprc)
rc = tmprc;
kfree(utf16_path);
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 3bfc9c990724..20a2d304c603 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -27,6 +27,7 @@
#include "smb2proto.h"
#include "smb2status.h"
#include "smb2glob.h"
+#include "trace.h"
struct status_to_posix_error {
__le32 smb2_status;
@@ -2450,13 +2451,16 @@ smb2_print_status(__le32 status)
int
map_smb2_to_linux_error(char *buf, bool log_err)
{
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
unsigned int i;
int rc = -EIO;
__le32 smb2err = shdr->Status;
- if (smb2err == 0)
+ if (smb2err == 0) {
+ trace_smb3_cmd_done(shdr->TreeId, shdr->SessionId,
+ le16_to_cpu(shdr->Command), le64_to_cpu(shdr->MessageId));
return 0;
+ }
/* mask facility */
if (log_err && (smb2err != STATUS_MORE_PROCESSING_REQUIRED) &&
@@ -2478,5 +2482,8 @@ map_smb2_to_linux_error(char *buf, bool log_err)
cifs_dbg(FYI, "Mapping SMB2 status code 0x%08x to POSIX err %d\n",
__le32_to_cpu(smb2err), rc);
+ trace_smb3_cmd_err(shdr->TreeId, shdr->SessionId,
+ le16_to_cpu(shdr->Command),
+ le64_to_cpu(shdr->MessageId), le32_to_cpu(smb2err), rc);
return rc;
}
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 68ea8491c160..cb5728e3d87d 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -94,8 +94,8 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
};
#ifdef CONFIG_CIFS_SMB311
-static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, __u32 non_ctxlen,
- size_t hdr_preamble_size)
+static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
+ __u32 non_ctxlen)
{
__u16 neg_count;
__u32 nc_offset, size_of_pad_before_neg_ctxts;
@@ -109,12 +109,11 @@ static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, __u32 non_ctxlen,
/* Make sure that negotiate contexts start after gss security blob */
nc_offset = le32_to_cpu(pneg_rsp->NegotiateContextOffset);
- if (nc_offset < non_ctxlen - hdr_preamble_size /* RFC1001 len */) {
+ if (nc_offset < non_ctxlen) {
printk_once(KERN_WARNING "invalid negotiate context offset\n");
return 0;
}
- size_of_pad_before_neg_ctxts = nc_offset -
- (non_ctxlen - hdr_preamble_size);
+ size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen;
/* Verify that at least minimal negotiate contexts fit within frame */
if (len < nc_offset + (neg_count * sizeof(struct smb2_neg_context))) {
@@ -131,25 +130,20 @@ static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, __u32 non_ctxlen,
#endif /* CIFS_SMB311 */
int
-smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
+smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
{
- struct smb2_pdu *pdu = (struct smb2_pdu *)buf;
- struct smb2_hdr *hdr = &pdu->hdr;
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
+ struct smb2_sync_pdu *pdu = (struct smb2_sync_pdu *)shdr;
__u64 mid;
- __u32 len = get_rfc1002_length(buf);
__u32 clc_len; /* calculated length */
int command;
-
- /* BB disable following printk later */
- cifs_dbg(FYI, "%s length: 0x%x, smb_buf_length: 0x%x\n",
- __func__, length, len);
+ int pdu_size = sizeof(struct smb2_sync_pdu);
+ int hdr_size = sizeof(struct smb2_sync_hdr);
/*
* Add function to do table lookup of StructureSize by command
* ie Validate the wct via smb2_struct_sizes table above
*/
-
if (shdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
struct smb2_transform_hdr *thdr =
(struct smb2_transform_hdr *)buf;
@@ -173,8 +167,8 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
}
mid = le64_to_cpu(shdr->MessageId);
- if (length < sizeof(struct smb2_pdu)) {
- if ((length >= sizeof(struct smb2_hdr))
+ if (len < pdu_size) {
+ if ((len >= hdr_size)
&& (shdr->Status != 0)) {
pdu->StructureSize2 = 0;
/*
@@ -187,8 +181,7 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
}
return 1;
}
- if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE -
- srvr->vals->header_preamble_size) {
+ if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE) {
cifs_dbg(VFS, "SMB length greater than maximum, mid=%llu\n",
mid);
return 1;
@@ -227,44 +220,38 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
}
}
- if (srvr->vals->header_preamble_size + len != length) {
- cifs_dbg(VFS, "Total length %u RFC1002 length %zu mismatch mid %llu\n",
- length, srvr->vals->header_preamble_size + len, mid);
- return 1;
- }
-
- clc_len = smb2_calc_size(hdr);
+ clc_len = smb2_calc_size(buf, srvr);
#ifdef CONFIG_CIFS_SMB311
if (shdr->Command == SMB2_NEGOTIATE)
- clc_len += get_neg_ctxt_len(hdr, len, clc_len,
- srvr->vals->header_preamble_size);
+ clc_len += get_neg_ctxt_len(shdr, len, clc_len);
#endif /* SMB311 */
- if (srvr->vals->header_preamble_size + len != clc_len) {
- cifs_dbg(FYI, "Calculated size %u length %zu mismatch mid %llu\n",
- clc_len, srvr->vals->header_preamble_size + len, mid);
+ if (len != clc_len) {
+ cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n",
+ clc_len, len, mid);
/* create failed on symlink */
if (command == SMB2_CREATE_HE &&
shdr->Status == STATUS_STOPPED_ON_SYMLINK)
return 0;
/* Windows 7 server returns 24 bytes more */
- if (clc_len + 24 - srvr->vals->header_preamble_size == len && command == SMB2_OPLOCK_BREAK_HE)
+ if (clc_len + 24 == len && command == SMB2_OPLOCK_BREAK_HE)
return 0;
/* server can return one byte more due to implied bcc[0] */
- if (clc_len == srvr->vals->header_preamble_size + len + 1)
+ if (clc_len == len + 1)
return 0;
/*
* MacOS server pads after SMB2.1 write response with 3 bytes
* of junk. Other servers match RFC1001 len to actual
* SMB2/SMB3 frame length (header + smb2 response specific data)
+ * Some windows servers do too when compounding is used.
* Log the server error (once), but allow it and continue
* since the frame is parseable.
*/
- if (clc_len < srvr->vals->header_preamble_size /* RFC1001 header size */ + len) {
+ if (clc_len < len) {
printk_once(KERN_WARNING
- "SMB2 server sent bad RFC1001 len %d not %zu\n",
- len, clc_len - srvr->vals->header_preamble_size);
+ "SMB2 server sent bad RFC1001 len %d not %d\n",
+ len, clc_len);
return 0;
}
@@ -305,15 +292,14 @@ static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = {
* area and the offset to it (from the beginning of the smb are also returned.
*/
char *
-smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
+smb2_get_data_area_len(int *off, int *len, struct smb2_sync_hdr *shdr)
{
- struct smb2_sync_hdr *shdr = get_sync_hdr(hdr);
*off = 0;
*len = 0;
/* error responses do not have data area */
if (shdr->Status && shdr->Status != STATUS_MORE_PROCESSING_REQUIRED &&
- (((struct smb2_err_rsp *)hdr)->StructureSize) ==
+ (((struct smb2_err_rsp *)shdr)->StructureSize) ==
SMB2_ERROR_STRUCTURE_SIZE2)
return NULL;
@@ -325,42 +311,44 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
switch (shdr->Command) {
case SMB2_NEGOTIATE:
*off = le16_to_cpu(
- ((struct smb2_negotiate_rsp *)hdr)->SecurityBufferOffset);
+ ((struct smb2_negotiate_rsp *)shdr)->SecurityBufferOffset);
*len = le16_to_cpu(
- ((struct smb2_negotiate_rsp *)hdr)->SecurityBufferLength);
+ ((struct smb2_negotiate_rsp *)shdr)->SecurityBufferLength);
break;
case SMB2_SESSION_SETUP:
*off = le16_to_cpu(
- ((struct smb2_sess_setup_rsp *)hdr)->SecurityBufferOffset);
+ ((struct smb2_sess_setup_rsp *)shdr)->SecurityBufferOffset);
*len = le16_to_cpu(
- ((struct smb2_sess_setup_rsp *)hdr)->SecurityBufferLength);
+ ((struct smb2_sess_setup_rsp *)shdr)->SecurityBufferLength);
break;
case SMB2_CREATE:
*off = le32_to_cpu(
- ((struct smb2_create_rsp *)hdr)->CreateContextsOffset);
+ ((struct smb2_create_rsp *)shdr)->CreateContextsOffset);
*len = le32_to_cpu(
- ((struct smb2_create_rsp *)hdr)->CreateContextsLength);
+ ((struct smb2_create_rsp *)shdr)->CreateContextsLength);
break;
case SMB2_QUERY_INFO:
*off = le16_to_cpu(
- ((struct smb2_query_info_rsp *)hdr)->OutputBufferOffset);
+ ((struct smb2_query_info_rsp *)shdr)->OutputBufferOffset);
*len = le32_to_cpu(
- ((struct smb2_query_info_rsp *)hdr)->OutputBufferLength);
+ ((struct smb2_query_info_rsp *)shdr)->OutputBufferLength);
break;
case SMB2_READ:
- *off = ((struct smb2_read_rsp *)hdr)->DataOffset;
- *len = le32_to_cpu(((struct smb2_read_rsp *)hdr)->DataLength);
+ /* TODO: is this a bug ? */
+ *off = ((struct smb2_read_rsp *)shdr)->DataOffset;
+ *len = le32_to_cpu(((struct smb2_read_rsp *)shdr)->DataLength);
break;
case SMB2_QUERY_DIRECTORY:
*off = le16_to_cpu(
- ((struct smb2_query_directory_rsp *)hdr)->OutputBufferOffset);
+ ((struct smb2_query_directory_rsp *)shdr)->OutputBufferOffset);
*len = le32_to_cpu(
- ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength);
+ ((struct smb2_query_directory_rsp *)shdr)->OutputBufferLength);
break;
case SMB2_IOCTL:
*off = le32_to_cpu(
- ((struct smb2_ioctl_rsp *)hdr)->OutputOffset);
- *len = le32_to_cpu(((struct smb2_ioctl_rsp *)hdr)->OutputCount);
+ ((struct smb2_ioctl_rsp *)shdr)->OutputOffset);
+ *len = le32_to_cpu(
+ ((struct smb2_ioctl_rsp *)shdr)->OutputCount);
break;
case SMB2_CHANGE_NOTIFY:
default:
@@ -403,15 +391,14 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
* portion, the number of word parameters and the data portion of the message.
*/
unsigned int
-smb2_calc_size(void *buf)
+smb2_calc_size(void *buf, struct TCP_Server_Info *srvr)
{
- struct smb2_pdu *pdu = (struct smb2_pdu *)buf;
- struct smb2_hdr *hdr = &pdu->hdr;
- struct smb2_sync_hdr *shdr = get_sync_hdr(hdr);
+ struct smb2_sync_pdu *pdu = (struct smb2_sync_pdu *)buf;
+ struct smb2_sync_hdr *shdr = &pdu->sync_hdr;
int offset; /* the offset from the beginning of SMB to data area */
int data_length; /* the length of the variable length data area */
/* Structure Size has already been checked to make sure it is 64 */
- int len = 4 + le16_to_cpu(shdr->StructureSize);
+ int len = le16_to_cpu(shdr->StructureSize);
/*
* StructureSize2, ie length of fixed parameter area has already
@@ -422,7 +409,7 @@ smb2_calc_size(void *buf)
if (has_smb2_data_area[le16_to_cpu(shdr->Command)] == false)
goto calc_size_exit;
- smb2_get_data_area_len(&offset, &data_length, hdr);
+ smb2_get_data_area_len(&offset, &data_length, shdr);
cifs_dbg(FYI, "SMB2 data length %d offset %d\n", data_length, offset);
if (data_length > 0) {
@@ -430,15 +417,14 @@ smb2_calc_size(void *buf)
* Check to make sure that data area begins after fixed area,
* Note that last byte of the fixed area is part of data area
* for some commands, typically those with odd StructureSize,
- * so we must add one to the calculation (and 4 to account for
- * the size of the RFC1001 hdr.
+ * so we must add one to the calculation.
*/
- if (offset + 4 + 1 < len) {
+ if (offset + 1 < len) {
cifs_dbg(VFS, "data area offset %d overlaps SMB2 header %d\n",
- offset + 4 + 1, len);
+ offset + 1, len);
data_length = 0;
} else {
- len = 4 + offset + data_length;
+ len = offset + data_length;
}
}
calc_size_exit:
@@ -465,8 +451,14 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb)
/* Windows doesn't allow paths beginning with \ */
if (from[0] == '\\')
start_of_path = from + 1;
+#ifdef CONFIG_CIFS_SMB311
+ /* SMB311 POSIX extensions paths do not include leading slash */
+ else if (cifs_sb_master_tcon(cifs_sb)->posix_extensions)
+ start_of_path = from + 1;
+#endif /* 311 */
else
start_of_path = from;
+
to = cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len,
cifs_sb->local_nls, map_type);
return to;
@@ -621,7 +613,7 @@ smb2_is_valid_lease_break(char *buffer)
bool
smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
{
- struct smb2_oplock_break_rsp *rsp = (struct smb2_oplock_break_rsp *)buffer;
+ struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer;
struct list_head *tmp, *tmp1, *tmp2;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
@@ -630,7 +622,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
cifs_dbg(FYI, "Checking for oplock break\n");
- if (rsp->hdr.sync_hdr.Command != SMB2_OPLOCK_BREAK)
+ if (rsp->sync_hdr.Command != SMB2_OPLOCK_BREAK)
return false;
if (rsp->StructureSize !=
@@ -721,7 +713,7 @@ smb2_cancelled_close_fid(struct work_struct *work)
int
smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
{
- struct smb2_sync_hdr *sync_hdr = get_sync_hdr(buffer);
+ struct smb2_sync_hdr *sync_hdr = (struct smb2_sync_hdr *)buffer;
struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer;
struct cifs_tcon *tcon;
struct close_cancelled_open *cancelled;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index b76b85881dcc..950d0ab2cc61 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -123,7 +123,7 @@ smb2_get_credits_field(struct TCP_Server_Info *server, const int optype)
static unsigned int
smb2_get_credits(struct mid_q_entry *mid)
{
- struct smb2_sync_hdr *shdr = get_sync_hdr(mid->resp_buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)mid->resp_buf;
return le16_to_cpu(shdr->CreditRequest);
}
@@ -190,7 +190,7 @@ static struct mid_q_entry *
smb2_find_mid(struct TCP_Server_Info *server, char *buf)
{
struct mid_q_entry *mid;
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
__u64 wire_mid = le64_to_cpu(shdr->MessageId);
if (shdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
@@ -212,15 +212,16 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
}
static void
-smb2_dump_detail(void *buf)
+smb2_dump_detail(void *buf, struct TCP_Server_Info *server)
{
#ifdef CONFIG_CIFS_DEBUG2
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d\n",
shdr->Command, shdr->Status, shdr->Flags, shdr->MessageId,
shdr->ProcessId);
- cifs_dbg(VFS, "smb buf %p len %u\n", buf, smb2_calc_size(buf));
+ cifs_dbg(VFS, "smb buf %p len %u\n", buf,
+ server->ops->calc_smb_size(buf, server));
#endif
}
@@ -322,6 +323,40 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
}
#endif /* STATS2 */
+/*
+ * Open the directory at the root of a share
+ */
+int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
+{
+ struct cifs_open_parms oparams;
+ int rc;
+ __le16 srch_path = 0; /* Null - since an open of top of share */
+ u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+
+ mutex_lock(&tcon->prfid_mutex);
+ if (tcon->valid_root_fid) {
+ cifs_dbg(FYI, "found a cached root file handle\n");
+ memcpy(pfid, tcon->prfid, sizeof(struct cifs_fid));
+ mutex_unlock(&tcon->prfid_mutex);
+ return 0;
+ }
+
+ oparams.tcon = tcon;
+ oparams.create_options = 0;
+ oparams.desired_access = FILE_READ_ATTRIBUTES;
+ oparams.disposition = FILE_OPEN;
+ oparams.fid = pfid;
+ oparams.reconnect = false;
+
+ rc = SMB2_open(xid, &oparams, &srch_path, &oplock, NULL, NULL);
+ if (rc == 0) {
+ memcpy(tcon->prfid, pfid, sizeof(struct cifs_fid));
+ tcon->valid_root_fid = true;
+ }
+ mutex_unlock(&tcon->prfid_mutex);
+ return rc;
+}
+
static void
smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
{
@@ -330,6 +365,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
struct cifs_open_parms oparms;
struct cifs_fid fid;
+ bool no_cached_open = tcon->nohandlecache;
oparms.tcon = tcon;
oparms.desired_access = FILE_READ_ATTRIBUTES;
@@ -338,7 +374,11 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
oparms.fid = &fid;
oparms.reconnect = false;
- rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL);
+ if (no_cached_open)
+ rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL);
+ else
+ rc = open_shroot(xid, tcon, &fid);
+
if (rc)
return;
@@ -352,7 +392,8 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
FS_DEVICE_INFORMATION);
SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */
- SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ if (no_cached_open)
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
return;
}
@@ -394,6 +435,9 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_open_parms oparms;
struct cifs_fid fid;
+ if ((*full_path == 0) && tcon->valid_root_fid)
+ return 0;
+
utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
if (!utf16_path)
return -ENOMEM;
@@ -589,9 +633,15 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ /*
+ * If ea_name is NULL (listxattr) and there are no EAs, return 0 as it's
+ * not an error. Otherwise, the specified ea_name was not found.
+ */
if (!rc)
rc = move_smb2_ea_to_cifs(ea_data, buf_size, smb2_data,
SMB2_MAX_EA_BUF, ea_name);
+ else if (!ea_name && rc == -ENODATA)
+ rc = 0;
kfree(smb2_data);
return rc;
@@ -698,9 +748,11 @@ smb2_dump_share_caps(struct seq_file *m, struct cifs_tcon *tcon)
seq_puts(m, " TRIM-support,");
seq_printf(m, "\tShare Flags: 0x%x", tcon->share_flags);
+ seq_printf(m, "\n\ttid: 0x%x", tcon->tid);
if (tcon->perf_sector_size)
seq_printf(m, "\tOptimal sector size: 0x%x",
tcon->perf_sector_size);
+ seq_printf(m, "\tMaximal Access: 0x%x", tcon->maximal_access);
}
static void
@@ -1251,7 +1303,7 @@ smb2_close_dir(const unsigned int xid, struct cifs_tcon *tcon,
static bool
smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length)
{
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
if (shdr->Status != STATUS_PENDING)
return false;
@@ -1269,12 +1321,13 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length)
static bool
smb2_is_session_expired(char *buf)
{
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
- if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED)
+ if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED &&
+ shdr->Status != STATUS_USER_SESSION_DELETED)
return false;
- cifs_dbg(FYI, "Session expired\n");
+ cifs_dbg(FYI, "Session expired or deleted\n");
return true;
}
@@ -1468,8 +1521,6 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
unsigned int sub_offset;
unsigned int print_len;
unsigned int print_offset;
- struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = ses->server;
cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
@@ -1493,7 +1544,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
err_buf = err_iov.iov_base;
if (le32_to_cpu(err_buf->ByteCount) < sizeof(struct smb2_symlink_err_rsp) ||
- err_iov.iov_len + server->vals->header_preamble_size < SMB2_SYMLINK_STRUCT_SIZE) {
+ err_iov.iov_len < SMB2_SYMLINK_STRUCT_SIZE) {
kfree(utf16_path);
return -ENOENT;
}
@@ -1506,14 +1557,13 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
print_len = le16_to_cpu(symlink->PrintNameLength);
print_offset = le16_to_cpu(symlink->PrintNameOffset);
- if (err_iov.iov_len + server->vals->header_preamble_size <
- SMB2_SYMLINK_STRUCT_SIZE + sub_offset + sub_len) {
+ if (err_iov.iov_len < SMB2_SYMLINK_STRUCT_SIZE + sub_offset + sub_len) {
kfree(utf16_path);
return -ENOENT;
}
- if (err_iov.iov_len + server->vals->header_preamble_size <
- SMB2_SYMLINK_STRUCT_SIZE + print_offset + print_len) {
+ if (err_iov.iov_len <
+ SMB2_SYMLINK_STRUCT_SIZE + print_offset + print_len) {
kfree(utf16_path);
return -ENOENT;
}
@@ -1587,8 +1637,11 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
oparms.create_options = 0;
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
- if (!utf16_path)
- return ERR_PTR(-ENOMEM);
+ if (!utf16_path) {
+ rc = -ENOMEM;
+ free_xid(xid);
+ return ERR_PTR(rc);
+ }
oparms.tcon = tcon;
oparms.desired_access = READ_CONTROL;
@@ -1646,8 +1699,11 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
access_flags = WRITE_DAC;
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
- if (!utf16_path)
- return -ENOMEM;
+ if (!utf16_path) {
+ rc = -ENOMEM;
+ free_xid(xid);
+ return rc;
+ }
oparms.tcon = tcon;
oparms.desired_access = access_flags;
@@ -1707,15 +1763,21 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
/* if file not oplocked can't be sure whether asking to extend size */
if (!CIFS_CACHE_READ(cifsi))
- if (keep_size == false)
- return -EOPNOTSUPP;
+ if (keep_size == false) {
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
+ }
/*
* Must check if file sparse since fallocate -z (zero range) assumes
* non-sparse allocation
*/
- if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE))
- return -EOPNOTSUPP;
+ if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE)) {
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
+ }
/*
* need to make sure we are not asked to extend the file since the SMB3
@@ -1724,8 +1786,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
* which for a non sparse file would zero the newly extended range
*/
if (keep_size == false)
- if (i_size_read(inode) < offset + len)
- return -EOPNOTSUPP;
+ if (i_size_read(inode) < offset + len) {
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
+ }
cifs_dbg(FYI, "offset %lld len %lld", offset, len);
@@ -1758,8 +1823,11 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
/* Need to make file sparse, if not already, before freeing range. */
/* Consider adding equivalent for compressed since it could also work */
- if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse))
- return -EOPNOTSUPP;
+ if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) {
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
+ }
cifs_dbg(FYI, "offset %lld len %lld", offset, len);
@@ -1790,8 +1858,10 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
/* if file not oplocked can't be sure whether asking to extend size */
if (!CIFS_CACHE_READ(cifsi))
- if (keep_size == false)
- return -EOPNOTSUPP;
+ if (keep_size == false) {
+ free_xid(xid);
+ return rc;
+ }
/*
* Files are non-sparse by default so falloc may be a no-op
@@ -1800,14 +1870,16 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
*/
if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) {
if (keep_size == true)
- return 0;
+ rc = 0;
/* check if extending file */
else if (i_size_read(inode) >= off + len)
/* not extending file and already not sparse */
- return 0;
+ rc = 0;
/* BB: in future add else clause to extend file */
else
- return -EOPNOTSUPP;
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
}
if ((keep_size == true) || (i_size_read(inode) >= off + len)) {
@@ -1819,8 +1891,11 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
* ie potentially making a few extra pages at the beginning
* or end of the file non-sparse via set_sparse is harmless.
*/
- if ((off > 8192) || (off + len + 8192 < i_size_read(inode)))
- return -EOPNOTSUPP;
+ if ((off > 8192) || (off + len + 8192 < i_size_read(inode))) {
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
+ }
rc = smb2_set_sparse(xid, tcon, cfile, inode, false);
}
@@ -2029,7 +2104,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock)
}
static __u8
-smb2_parse_lease_buf(void *buf, unsigned int *epoch)
+smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
{
struct create_lease *lc = (struct create_lease *)buf;
@@ -2040,13 +2115,16 @@ smb2_parse_lease_buf(void *buf, unsigned int *epoch)
}
static __u8
-smb3_parse_lease_buf(void *buf, unsigned int *epoch)
+smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
{
struct create_lease_v2 *lc = (struct create_lease_v2 *)buf;
*epoch = le16_to_cpu(lc->lcontext.Epoch);
if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS)
return SMB2_OPLOCK_LEVEL_NOCHANGE;
+ if (lease_key)
+ memcpy(lease_key, &lc->lcontext.LeaseKeyLow,
+ SMB2_LEASE_KEY_SIZE);
return le32_to_cpu(lc->lcontext.LeaseState);
}
@@ -2064,12 +2142,11 @@ smb2_dir_needs_close(struct cifsFileInfo *cfile)
}
static void
-fill_transform_hdr(struct TCP_Server_Info *server,
- struct smb2_transform_hdr *tr_hdr, struct smb_rqst *old_rq)
+fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len,
+ struct smb_rqst *old_rq)
{
struct smb2_sync_hdr *shdr =
(struct smb2_sync_hdr *)old_rq->rq_iov[1].iov_base;
- unsigned int orig_len = get_rfc1002_length(old_rq->rq_iov[0].iov_base);
memset(tr_hdr, 0, sizeof(struct smb2_transform_hdr));
tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM;
@@ -2077,8 +2154,6 @@ fill_transform_hdr(struct TCP_Server_Info *server,
tr_hdr->Flags = cpu_to_le16(0x01);
get_random_bytes(&tr_hdr->Nonce, SMB3_AES128CMM_NONCE);
memcpy(&tr_hdr->SessionId, &shdr->SessionId, 8);
- inc_rfc1001_len(tr_hdr, sizeof(struct smb2_transform_hdr) - server->vals->header_preamble_size);
- inc_rfc1001_len(tr_hdr, orig_len);
}
/* We can not use the normal sg_set_buf() as we will sometimes pass a
@@ -2090,11 +2165,16 @@ static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf,
sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
}
+/* Assumes:
+ * rqst->rq_iov[0] is rfc1002 length
+ * rqst->rq_iov[1] is tranform header
+ * rqst->rq_iov[2+] data to be encrypted/decrypted
+ */
static struct scatterlist *
init_sg(struct smb_rqst *rqst, u8 *sign)
{
- unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages + 1;
- unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+ unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages;
+ unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
struct scatterlist *sg;
unsigned int i;
unsigned int j;
@@ -2104,10 +2184,10 @@ init_sg(struct smb_rqst *rqst, u8 *sign)
return NULL;
sg_init_table(sg, sg_len);
- smb2_sg_set_buf(&sg[0], rqst->rq_iov[0].iov_base + 24, assoc_data_len);
- for (i = 1; i < rqst->rq_nvec; i++)
- smb2_sg_set_buf(&sg[i], rqst->rq_iov[i].iov_base,
- rqst->rq_iov[i].iov_len);
+ smb2_sg_set_buf(&sg[0], rqst->rq_iov[1].iov_base + 20, assoc_data_len);
+ for (i = 1; i < rqst->rq_nvec - 1; i++)
+ smb2_sg_set_buf(&sg[i], rqst->rq_iov[i+1].iov_base,
+ rqst->rq_iov[i+1].iov_len);
for (j = 0; i < sg_len - 1; i++, j++) {
unsigned int len = (j < rqst->rq_npages - 1) ? rqst->rq_pagesz
: rqst->rq_tailsz;
@@ -2139,9 +2219,10 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
}
/*
* Encrypt or decrypt @rqst message. @rqst has the following format:
- * iov[0] - transform header (associate data),
- * iov[1-N] and pages - data to encrypt.
- * On success return encrypted data in iov[1-N] and pages, leave iov[0]
+ * iov[0] - rfc1002 length
+ * iov[1] - transform header (associate data),
+ * iov[2-N] and pages - data to encrypt.
+ * On success return encrypted data in iov[2-N] and pages, leave iov[0-1]
* untouched.
*/
static int
@@ -2149,7 +2230,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
{
struct smb2_transform_hdr *tr_hdr =
(struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base;
- unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20 - server->vals->header_preamble_size;
+ unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
int rc = 0;
struct scatterlist *sg;
u8 sign[SMB2_SIGNATURE_SIZE] = {};
@@ -2236,6 +2317,10 @@ free_req:
return rc;
}
+/*
+ * This is called from smb_send_rqst. At this point we have the rfc1002
+ * header as the first element in the vector.
+ */
static int
smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
struct smb_rqst *old_rq)
@@ -2244,6 +2329,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
struct page **pages;
struct smb2_transform_hdr *tr_hdr;
unsigned int npages = old_rq->rq_npages;
+ unsigned int orig_len = get_rfc1002_length(old_rq->rq_iov[0].iov_base);
int i;
int rc = -ENOMEM;
@@ -2262,24 +2348,34 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
goto err_free_pages;
}
- iov = kmalloc_array(old_rq->rq_nvec, sizeof(struct kvec), GFP_KERNEL);
+ /* Make space for one extra iov to hold the transform header */
+ iov = kmalloc_array(old_rq->rq_nvec + 1, sizeof(struct kvec),
+ GFP_KERNEL);
if (!iov)
goto err_free_pages;
/* copy all iovs from the old except the 1st one (rfc1002 length) */
- memcpy(&iov[1], &old_rq->rq_iov[1],
+ memcpy(&iov[2], &old_rq->rq_iov[1],
sizeof(struct kvec) * (old_rq->rq_nvec - 1));
+ /* copy the rfc1002 iov */
+ iov[0].iov_base = old_rq->rq_iov[0].iov_base;
+ iov[0].iov_len = old_rq->rq_iov[0].iov_len;
+
new_rq->rq_iov = iov;
- new_rq->rq_nvec = old_rq->rq_nvec;
+ new_rq->rq_nvec = old_rq->rq_nvec + 1;
tr_hdr = kmalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL);
if (!tr_hdr)
goto err_free_iov;
- /* fill the 1st iov with a transform header */
- fill_transform_hdr(server, tr_hdr, old_rq);
- new_rq->rq_iov[0].iov_base = tr_hdr;
- new_rq->rq_iov[0].iov_len = sizeof(struct smb2_transform_hdr);
+ /* fill the 2nd iov with a transform header */
+ fill_transform_hdr(tr_hdr, orig_len, old_rq);
+ new_rq->rq_iov[1].iov_base = tr_hdr;
+ new_rq->rq_iov[1].iov_len = sizeof(struct smb2_transform_hdr);
+
+ /* Update rfc1002 header */
+ inc_rfc1001_len(new_rq->rq_iov[0].iov_base,
+ sizeof(struct smb2_transform_hdr));
/* copy pages form the old */
for (i = 0; i < npages; i++) {
@@ -2319,7 +2415,7 @@ smb3_free_transform_rq(struct smb_rqst *rqst)
put_page(rqst->rq_pages[i]);
kfree(rqst->rq_pages);
/* free transform header */
- kfree(rqst->rq_iov[0].iov_base);
+ kfree(rqst->rq_iov[1].iov_base);
kfree(rqst->rq_iov);
}
@@ -2336,18 +2432,19 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
unsigned int buf_data_size, struct page **pages,
unsigned int npages, unsigned int page_data_size)
{
- struct kvec iov[2];
+ struct kvec iov[3];
struct smb_rqst rqst = {NULL};
- struct smb2_hdr *hdr;
int rc;
- iov[0].iov_base = buf;
- iov[0].iov_len = sizeof(struct smb2_transform_hdr);
- iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr);
- iov[1].iov_len = buf_data_size;
+ iov[0].iov_base = NULL;
+ iov[0].iov_len = 0;
+ iov[1].iov_base = buf;
+ iov[1].iov_len = sizeof(struct smb2_transform_hdr);
+ iov[2].iov_base = buf + sizeof(struct smb2_transform_hdr);
+ iov[2].iov_len = buf_data_size;
rqst.rq_iov = iov;
- rqst.rq_nvec = 2;
+ rqst.rq_nvec = 3;
rqst.rq_pages = pages;
rqst.rq_npages = npages;
rqst.rq_pagesz = PAGE_SIZE;
@@ -2359,10 +2456,9 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
if (rc)
return rc;
- memmove(buf + server->vals->header_preamble_size, iov[1].iov_base, buf_data_size);
- hdr = (struct smb2_hdr *)buf;
- hdr->smb2_buf_length = cpu_to_be32(buf_data_size + page_data_size);
- server->total_read = buf_data_size + page_data_size + server->vals->header_preamble_size;
+ memmove(buf, iov[2].iov_base, buf_data_size);
+
+ server->total_read = buf_data_size + page_data_size;
return rc;
}
@@ -2387,7 +2483,7 @@ read_data_into_pages(struct TCP_Server_Info *server, struct page **pages,
zero_user(page, len, PAGE_SIZE - len);
len = 0;
}
- length = cifs_read_page_from_socket(server, page, n);
+ length = cifs_read_page_from_socket(server, page, 0, n);
if (length < 0)
return length;
server->total_read += length;
@@ -2435,7 +2531,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
unsigned int cur_page_idx;
unsigned int pad_len;
struct cifs_readdata *rdata = mid->callback_data;
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
struct bio_vec *bvec = NULL;
struct iov_iter iter;
struct kvec iov;
@@ -2466,7 +2562,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
return 0;
}
- data_offset = server->ops->read_data_offset(buf) + server->vals->header_preamble_size;
+ data_offset = server->ops->read_data_offset(buf);
#ifdef CONFIG_CIFS_SMB_DIRECT
use_rdma_mr = rdata->mr;
#endif
@@ -2562,12 +2658,11 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
unsigned int npages;
struct page **pages;
unsigned int len;
- unsigned int buflen = server->pdu_size + server->vals->header_preamble_size;
+ unsigned int buflen = server->pdu_size;
int rc;
int i = 0;
- len = min_t(unsigned int, buflen, server->vals->read_rsp_size -
- server->vals->header_preamble_size +
+ len = min_t(unsigned int, buflen, server->vals->read_rsp_size +
sizeof(struct smb2_transform_hdr)) - HEADER_SIZE(server) + 1;
rc = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1, len);
@@ -2575,8 +2670,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
return rc;
server->total_read += rc;
- len = le32_to_cpu(tr_hdr->OriginalMessageSize) +
- server->vals->header_preamble_size -
+ len = le32_to_cpu(tr_hdr->OriginalMessageSize) -
server->vals->read_rsp_size;
npages = DIV_ROUND_UP(len, PAGE_SIZE);
@@ -2603,8 +2697,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
if (rc)
goto free_pages;
- rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size -
- server->vals->header_preamble_size,
+ rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size,
pages, npages, len);
if (rc)
goto free_pages;
@@ -2641,7 +2734,7 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
struct mid_q_entry *mid_entry;
/* switch to large buffer if too big for a small one */
- if (pdu_length + server->vals->header_preamble_size > MAX_CIFS_SMALL_BUFFER_SIZE) {
+ if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE) {
server->large_buf = true;
memcpy(server->bigbuf, buf, server->total_read);
buf = server->bigbuf;
@@ -2649,13 +2742,12 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
/* now read the rest */
length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1,
- pdu_length - HEADER_SIZE(server) + 1 +
- server->vals->header_preamble_size);
+ pdu_length - HEADER_SIZE(server) + 1);
if (length < 0)
return length;
server->total_read += length;
- buf_size = pdu_length + server->vals->header_preamble_size - sizeof(struct smb2_transform_hdr);
+ buf_size = pdu_length - sizeof(struct smb2_transform_hdr);
length = decrypt_raw_data(server, buf, buf_size, NULL, 0, 0);
if (length)
return length;
@@ -2684,7 +2776,7 @@ smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid)
struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
unsigned int orig_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
- if (pdu_length + server->vals->header_preamble_size < sizeof(struct smb2_transform_hdr) +
+ if (pdu_length < sizeof(struct smb2_transform_hdr) +
sizeof(struct smb2_sync_hdr)) {
cifs_dbg(VFS, "Transform message is too small (%u)\n",
pdu_length);
@@ -2693,14 +2785,14 @@ smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid)
return -ECONNABORTED;
}
- if (pdu_length + server->vals->header_preamble_size < orig_len + sizeof(struct smb2_transform_hdr)) {
+ if (pdu_length < orig_len + sizeof(struct smb2_transform_hdr)) {
cifs_dbg(VFS, "Transform message is broken\n");
cifs_reconnect(server);
wake_up(&server->response_q);
return -ECONNABORTED;
}
- if (pdu_length + server->vals->header_preamble_size > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
+ if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
return receive_encrypted_read(server, mid);
return receive_encrypted_standard(server, mid);
@@ -2711,11 +2803,23 @@ smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid)
{
char *buf = server->large_buf ? server->bigbuf : server->smallbuf;
- return handle_read_data(server, mid, buf, server->pdu_size +
- server->vals->header_preamble_size,
+ return handle_read_data(server, mid, buf, server->pdu_size,
NULL, 0, 0);
}
+static int
+smb2_next_header(char *buf)
+{
+ struct smb2_sync_hdr *hdr = (struct smb2_sync_hdr *)buf;
+ struct smb2_transform_hdr *t_hdr = (struct smb2_transform_hdr *)buf;
+
+ if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM)
+ return sizeof(struct smb2_transform_hdr) +
+ le32_to_cpu(t_hdr->OriginalMessageSize);
+
+ return le32_to_cpu(hdr->NextCommand);
+}
+
struct smb_version_operations smb20_operations = {
.compare_fids = smb2_compare_fids,
.setup_request = smb2_setup_request,
@@ -2807,6 +2911,7 @@ struct smb_version_operations smb20_operations = {
.get_acl_by_fid = get_smb2_acl_by_fid,
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
+ .next_header = smb2_next_header,
};
struct smb_version_operations smb21_operations = {
@@ -2901,6 +3006,7 @@ struct smb_version_operations smb21_operations = {
.get_acl_by_fid = get_smb2_acl_by_fid,
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
+ .next_header = smb2_next_header,
};
struct smb_version_operations smb30_operations = {
@@ -3005,6 +3111,7 @@ struct smb_version_operations smb30_operations = {
.get_acl_by_fid = get_smb2_acl_by_fid,
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
+ .next_header = smb2_next_header,
};
#ifdef CONFIG_CIFS_SMB311
@@ -3105,6 +3212,7 @@ struct smb_version_operations smb311_operations = {
.query_all_EAs = smb2_query_eas,
.set_EA = smb2_set_ea,
#endif /* CIFS_XATTR */
+ .next_header = smb2_next_header,
};
#endif /* CIFS_SMB311 */
@@ -3116,8 +3224,8 @@ struct smb_version_values smb20_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
@@ -3137,8 +3245,8 @@ struct smb_version_values smb21_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
@@ -3158,8 +3266,8 @@ struct smb_version_values smb3any_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
@@ -3179,8 +3287,8 @@ struct smb_version_values smbdefault_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
@@ -3200,8 +3308,8 @@ struct smb_version_values smb30_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
@@ -3221,8 +3329,8 @@ struct smb_version_values smb302_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
@@ -3243,8 +3351,8 @@ struct smb_version_values smb311_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 60db51bae0e3..281fbc1dc720 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -49,6 +49,7 @@
#include "cifspdu.h"
#include "cifs_spnego.h"
#include "smbdirect.h"
+#include "trace.h"
/*
* The following table defines the expected "StructureSize" of SMB2 requests
@@ -79,7 +80,7 @@ static const int smb2_req_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
/* SMB2_OPLOCK_BREAK */ 24 /* BB this is 36 for LEASE_BREAK variant */
};
-static int encryption_required(const struct cifs_tcon *tcon)
+static int smb3_encryption_required(const struct cifs_tcon *tcon)
{
if (!tcon)
return 0;
@@ -145,7 +146,7 @@ smb2_hdr_assemble(struct smb2_sync_hdr *shdr, __le16 smb2_cmd,
shdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; */
if (tcon->ses && tcon->ses->server && tcon->ses->server->sign &&
- !encryption_required(tcon))
+ !smb3_encryption_required(tcon))
shdr->Flags |= SMB2_FLAGS_SIGNED;
out:
return;
@@ -367,6 +368,7 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1)
#define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2)
+#define SMB2_POSIX_EXTENSIONS_AVAILABLE cpu_to_le16(0x100)
static void
build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt)
@@ -390,21 +392,35 @@ build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt)
}
static void
+build_posix_ctxt(struct smb2_posix_neg_context *pneg_ctxt)
+{
+ pneg_ctxt->ContextType = SMB2_POSIX_EXTENSIONS_AVAILABLE;
+ pneg_ctxt->DataLength = cpu_to_le16(POSIX_CTXT_DATA_LEN);
+}
+
+static void
assemble_neg_contexts(struct smb2_negotiate_req *req,
unsigned int *total_len)
{
char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT;
+ unsigned int ctxt_len;
+ *total_len += 2; /* Add 2 due to round to 8 byte boundary for 1st ctxt */
build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt);
- /* Add 2 to size to round to 8 byte boundary */
+ ctxt_len = DIV_ROUND_UP(sizeof(struct smb2_preauth_neg_context), 8) * 8;
+ *total_len += ctxt_len;
+ pneg_ctxt += ctxt_len;
- pneg_ctxt += 2 + sizeof(struct smb2_preauth_neg_context);
build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
- req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
- req->NegotiateContextCount = cpu_to_le16(2);
+ ctxt_len = DIV_ROUND_UP(sizeof(struct smb2_encryption_neg_context), 8) * 8;
+ *total_len += ctxt_len;
+ pneg_ctxt += ctxt_len;
+
+ build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt);
+ *total_len += sizeof(struct smb2_posix_neg_context);
- *total_len += 4 + sizeof(struct smb2_preauth_neg_context)
- + sizeof(struct smb2_encryption_neg_context);
+ req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
+ req->NegotiateContextCount = cpu_to_le16(3);
}
static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt)
@@ -449,12 +465,12 @@ static int decode_encrypt_ctx(struct TCP_Server_Info *server,
}
static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
- struct TCP_Server_Info *server)
+ struct TCP_Server_Info *server,
+ unsigned int len_of_smb)
{
struct smb2_neg_context *pctx;
unsigned int offset = le32_to_cpu(rsp->NegotiateContextOffset);
unsigned int ctxt_cnt = le16_to_cpu(rsp->NegotiateContextCount);
- unsigned int len_of_smb = be32_to_cpu(rsp->hdr.smb2_buf_length);
unsigned int len_of_ctxts, i;
int rc = 0;
@@ -475,8 +491,7 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
if (len_of_ctxts < sizeof(struct smb2_neg_context))
break;
- pctx = (struct smb2_neg_context *)(offset +
- server->vals->header_preamble_size + (char *)rsp);
+ pctx = (struct smb2_neg_context *)(offset + (char *)rsp);
clen = le16_to_cpu(pctx->DataLength);
if (clen > len_of_ctxts)
break;
@@ -487,6 +502,8 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES)
rc = decode_encrypt_ctx(server,
(struct smb2_encryption_neg_context *)pctx);
+ else if (pctx->ContextType == SMB2_POSIX_EXTENSIONS_AVAILABLE)
+ server->posix_ext_supported = true;
else
cifs_dbg(VFS, "unknown negcontext of type %d ignored\n",
le16_to_cpu(pctx->ContextType));
@@ -501,6 +518,64 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
return rc;
}
+static struct create_posix *
+create_posix_buf(umode_t mode)
+{
+ struct create_posix *buf;
+
+ buf = kzalloc(sizeof(struct create_posix),
+ GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ buf->ccontext.DataOffset =
+ cpu_to_le16(offsetof(struct create_posix, Mode));
+ buf->ccontext.DataLength = cpu_to_le32(4);
+ buf->ccontext.NameOffset =
+ cpu_to_le16(offsetof(struct create_posix, Name));
+ buf->ccontext.NameLength = cpu_to_le16(16);
+
+ /* SMB2_CREATE_TAG_POSIX is "0x93AD25509CB411E7B42383DE968BCD7C" */
+ buf->Name[0] = 0x93;
+ buf->Name[1] = 0xAD;
+ buf->Name[2] = 0x25;
+ buf->Name[3] = 0x50;
+ buf->Name[4] = 0x9C;
+ buf->Name[5] = 0xB4;
+ buf->Name[6] = 0x11;
+ buf->Name[7] = 0xE7;
+ buf->Name[8] = 0xB4;
+ buf->Name[9] = 0x23;
+ buf->Name[10] = 0x83;
+ buf->Name[11] = 0xDE;
+ buf->Name[12] = 0x96;
+ buf->Name[13] = 0x8B;
+ buf->Name[14] = 0xCD;
+ buf->Name[15] = 0x7C;
+ buf->Mode = cpu_to_le32(mode);
+ cifs_dbg(FYI, "mode on posix create 0%o", mode);
+ return buf;
+}
+
+static int
+add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode)
+{
+ struct smb2_create_req *req = iov[0].iov_base;
+ unsigned int num = *num_iovec;
+
+ iov[num].iov_base = create_posix_buf(mode);
+ if (iov[num].iov_base == NULL)
+ return -ENOMEM;
+ iov[num].iov_len = sizeof(struct create_posix);
+ if (!req->CreateContextsOffset)
+ req->CreateContextsOffset = cpu_to_le32(
+ sizeof(struct smb2_create_req) +
+ iov[num - 1].iov_len);
+ le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_posix));
+ *num_iovec = num + 1;
+ return 0;
+}
+
#else
static void assemble_neg_contexts(struct smb2_negotiate_req *req,
unsigned int *total_len)
@@ -691,7 +766,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
server->capabilities |= SMB2_NT_FIND | SMB2_LARGE_FILES;
security_blob = smb2_get_data_area_len(&blob_offset, &blob_length,
- &rsp->hdr);
+ (struct smb2_sync_hdr *)rsp);
/*
* See MS-SMB2 section 2.2.4: if no blob, client picks default which
* for us will be
@@ -718,7 +793,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
#ifdef CONFIG_CIFS_SMB311
if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
if (rsp->NegotiateContextCount)
- rc = smb311_decode_neg_context(rsp, server);
+ rc = smb311_decode_neg_context(rsp, server,
+ rsp_iov.iov_len);
else
cifs_dbg(VFS, "Missing expected negotiate contexts\n");
}
@@ -730,19 +806,14 @@ neg_exit:
int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
{
- int rc = 0;
- struct validate_negotiate_info_req vneg_inbuf;
+ int rc;
+ struct validate_negotiate_info_req *pneg_inbuf;
struct validate_negotiate_info_rsp *pneg_rsp = NULL;
u32 rsplen;
u32 inbuflen; /* max of 4 dialects */
cifs_dbg(FYI, "validate negotiate\n");
-#ifdef CONFIG_CIFS_SMB_DIRECT
- if (tcon->ses->server->rdma)
- return 0;
-#endif
-
/* In SMB3.11 preauth integrity supersedes validate negotiate */
if (tcon->ses->server->dialect == SMB311_PROT_ID)
return 0;
@@ -765,63 +836,69 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n");
- vneg_inbuf.Capabilities =
+ pneg_inbuf = kmalloc(sizeof(*pneg_inbuf), GFP_NOFS);
+ if (!pneg_inbuf)
+ return -ENOMEM;
+
+ pneg_inbuf->Capabilities =
cpu_to_le32(tcon->ses->server->vals->req_capabilities);
- memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid,
+ memcpy(pneg_inbuf->Guid, tcon->ses->server->client_guid,
SMB2_CLIENT_GUID_SIZE);
if (tcon->ses->sign)
- vneg_inbuf.SecurityMode =
+ pneg_inbuf->SecurityMode =
cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED);
else if (global_secflags & CIFSSEC_MAY_SIGN)
- vneg_inbuf.SecurityMode =
+ pneg_inbuf->SecurityMode =
cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED);
else
- vneg_inbuf.SecurityMode = 0;
+ pneg_inbuf->SecurityMode = 0;
if (strcmp(tcon->ses->server->vals->version_string,
SMB3ANY_VERSION_STRING) == 0) {
- vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
- vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
- vneg_inbuf.DialectCount = cpu_to_le16(2);
+ pneg_inbuf->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
+ pneg_inbuf->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
+ pneg_inbuf->DialectCount = cpu_to_le16(2);
/* structure is big enough for 3 dialects, sending only 2 */
- inbuflen = sizeof(struct validate_negotiate_info_req) - 2;
+ inbuflen = sizeof(*pneg_inbuf) -
+ sizeof(pneg_inbuf->Dialects[0]);
} else if (strcmp(tcon->ses->server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) {
- vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
- vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
- vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
- vneg_inbuf.DialectCount = cpu_to_le16(3);
+ pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
+ pneg_inbuf->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
+ pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
+ pneg_inbuf->DialectCount = cpu_to_le16(3);
/* structure is big enough for 3 dialects */
- inbuflen = sizeof(struct validate_negotiate_info_req);
+ inbuflen = sizeof(*pneg_inbuf);
} else {
/* otherwise specific dialect was requested */
- vneg_inbuf.Dialects[0] =
+ pneg_inbuf->Dialects[0] =
cpu_to_le16(tcon->ses->server->vals->protocol_id);
- vneg_inbuf.DialectCount = cpu_to_le16(1);
+ pneg_inbuf->DialectCount = cpu_to_le16(1);
/* structure is big enough for 3 dialects, sending only 1 */
- inbuflen = sizeof(struct validate_negotiate_info_req) - 4;
+ inbuflen = sizeof(*pneg_inbuf) -
+ sizeof(pneg_inbuf->Dialects[0]) * 2;
}
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
- (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
- (char **)&pneg_rsp, &rsplen);
+ (char *)pneg_inbuf, inbuflen, (char **)&pneg_rsp, &rsplen);
if (rc != 0) {
cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc);
- return -EIO;
+ rc = -EIO;
+ goto out_free_inbuf;
}
- if (rsplen != sizeof(struct validate_negotiate_info_rsp)) {
+ rc = -EIO;
+ if (rsplen != sizeof(*pneg_rsp)) {
cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n",
rsplen);
/* relax check since Mac returns max bufsize allowed on ioctl */
- if ((rsplen > CIFSMaxBufSize)
- || (rsplen < sizeof(struct validate_negotiate_info_rsp)))
- goto err_rsp_free;
+ if (rsplen > CIFSMaxBufSize || rsplen < sizeof(*pneg_rsp))
+ goto out_free_rsp;
}
/* check validate negotiate info response matches what we got earlier */
@@ -838,15 +915,17 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
goto vneg_out;
/* validate negotiate successful */
+ rc = 0;
cifs_dbg(FYI, "validate negotiate info successful\n");
- kfree(pneg_rsp);
- return 0;
+ goto out_free_rsp;
vneg_out:
cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n");
-err_rsp_free:
+out_free_rsp:
kfree(pneg_rsp);
- return -EIO;
+out_free_inbuf:
+ kfree(pneg_inbuf);
+ return rc;
}
enum securityEnum
@@ -1051,7 +1130,7 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
goto out_put_spnego_key;
rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
- ses->Suid = rsp->hdr.sync_hdr.SessionId;
+ ses->Suid = rsp->sync_hdr.SessionId;
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
@@ -1127,13 +1206,13 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
/* If true, rc here is expected and not an error */
if (sess_data->buf0_type != CIFS_NO_BUFFER &&
- rsp->hdr.sync_hdr.Status == STATUS_MORE_PROCESSING_REQUIRED)
+ rsp->sync_hdr.Status == STATUS_MORE_PROCESSING_REQUIRED)
rc = 0;
if (rc)
goto out;
- if (offsetof(struct smb2_sess_setup_rsp, Buffer) - ses->server->vals->header_preamble_size !=
+ if (offsetof(struct smb2_sess_setup_rsp, Buffer) !=
le16_to_cpu(rsp->SecurityBufferOffset)) {
cifs_dbg(VFS, "Invalid security buffer offset %d\n",
le16_to_cpu(rsp->SecurityBufferOffset));
@@ -1148,7 +1227,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
- ses->Suid = rsp->hdr.sync_hdr.SessionId;
+ ses->Suid = rsp->sync_hdr.SessionId;
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
out:
@@ -1206,7 +1285,7 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
- ses->Suid = rsp->hdr.sync_hdr.SessionId;
+ ses->Suid = rsp->sync_hdr.SessionId;
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
rc = SMB2_sess_establish_session(sess_data);
@@ -1273,6 +1352,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
sess_data->ses = ses;
sess_data->buf0_type = CIFS_NO_BUFFER;
sess_data->nls_cp = (struct nls_table *) nls_cp;
+ sess_data->previous_session = ses->Suid;
#ifdef CONFIG_CIFS_SMB311
/*
@@ -1400,7 +1480,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
return rc;
}
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
iov[0].iov_base = (char *)req;
@@ -1416,7 +1496,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
/* 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1 */
if ((ses->server->dialect == SMB311_PROT_ID) &&
- !encryption_required(tcon))
+ !smb3_encryption_required(tcon))
req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
@@ -1454,7 +1534,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess);
tcon->tidStatus = CifsGood;
tcon->need_reconnect = false;
- tcon->tid = rsp->hdr.sync_hdr.TreeId;
+ tcon->tid = rsp->sync_hdr.TreeId;
strlcpy(tcon->treeName, tree, sizeof(tcon->treeName));
if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) &&
@@ -1474,7 +1554,7 @@ tcon_exit:
return rc;
tcon_error_exit:
- if (rsp && rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) {
+ if (rsp && rsp->sync_hdr.Status == STATUS_BAD_NETWORK_NAME) {
cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
}
goto tcon_exit;
@@ -1505,7 +1585,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
flags |= CIFS_NO_RESP;
@@ -1572,7 +1652,7 @@ create_reconnect_durable_buf(struct cifs_fid *fid)
static __u8
parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
- unsigned int *epoch)
+ unsigned int *epoch, char *lease_key)
{
char *data_offset;
struct create_context *cc;
@@ -1580,14 +1660,15 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
unsigned int remaining;
char *name;
- data_offset = (char *)rsp + server->vals->header_preamble_size + le32_to_cpu(rsp->CreateContextsOffset);
+ data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset);
remaining = le32_to_cpu(rsp->CreateContextsLength);
cc = (struct create_context *)data_offset;
while (remaining >= sizeof(struct create_context)) {
name = le16_to_cpu(cc->NameOffset) + (char *)cc;
if (le16_to_cpu(cc->NameLength) == 4 &&
strncmp(name, "RqLs", 4) == 0)
- return server->ops->parse_lease_buf(cc, epoch);
+ return server->ops->parse_lease_buf(cc, epoch,
+ lease_key);
next = le32_to_cpu(cc->Next);
if (!next)
@@ -1815,7 +1896,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
struct TCP_Server_Info *server;
struct cifs_tcon *tcon = oparms->tcon;
struct cifs_ses *ses = tcon->ses;
- struct kvec iov[4];
+ struct kvec iov[5]; /* make sure at least one for each open context */
struct kvec rsp_iov = {NULL, 0};
int resp_buftype;
int uni_path_len;
@@ -1824,7 +1905,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
int rc = 0;
unsigned int n_iov = 2;
__u32 file_attributes = 0;
- char *dhc_buf = NULL, *lc_buf = NULL;
+ char *dhc_buf = NULL, *lc_buf = NULL, *pc_buf = NULL;
int flags = 0;
unsigned int total_len;
@@ -1840,7 +1921,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
if (oparms->create_options & CREATE_OPTION_READONLY)
@@ -1941,6 +2022,27 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
dhc_buf = iov[n_iov-1].iov_base;
}
+#ifdef CONFIG_CIFS_SMB311
+ if (tcon->posix_extensions) {
+ if (n_iov > 2) {
+ struct create_context *ccontext =
+ (struct create_context *)iov[n_iov-1].iov_base;
+ ccontext->Next =
+ cpu_to_le32(iov[n_iov-1].iov_len);
+ }
+
+ rc = add_posix_context(iov, &n_iov, oparms->mode);
+ if (rc) {
+ cifs_small_buf_release(req);
+ kfree(copy_path);
+ kfree(lc_buf);
+ kfree(dhc_buf);
+ return rc;
+ }
+ pc_buf = iov[n_iov-1].iov_base;
+ }
+#endif /* SMB311 */
+
rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags,
&rsp_iov);
cifs_small_buf_release(req);
@@ -1953,8 +2055,13 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
resp_buftype = CIFS_NO_BUFFER;
rsp = NULL;
}
+ trace_smb3_open_err(xid, tcon->tid, ses->Suid,
+ oparms->create_options, oparms->desired_access, rc);
goto creat_exit;
- }
+ } else
+ trace_smb3_open_done(xid, rsp->PersistentFileId, tcon->tid,
+ ses->Suid, oparms->create_options,
+ oparms->desired_access);
oparms->fid->persistent_fid = rsp->PersistentFileId;
oparms->fid->volatile_fid = rsp->VolatileFileId;
@@ -1969,13 +2076,15 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
}
if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
- *oplock = parse_lease_state(server, rsp, &oparms->fid->epoch);
+ *oplock = parse_lease_state(server, rsp, &oparms->fid->epoch,
+ oparms->fid->lease_key);
else
*oplock = rsp->OplockLevel;
creat_exit:
kfree(copy_path);
kfree(lc_buf);
kfree(dhc_buf);
+ kfree(pc_buf);
free_rsp_buf(resp_buftype, rsp);
return rc;
}
@@ -1991,7 +2100,6 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
{
struct smb2_ioctl_req *req;
struct smb2_ioctl_rsp *rsp;
- struct smb2_sync_hdr *shdr;
struct cifs_ses *ses;
struct kvec iov[2];
struct kvec rsp_iov;
@@ -2022,7 +2130,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->CtlCode = cpu_to_le32(opcode);
@@ -2085,6 +2193,10 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
cifs_small_buf_release(req);
rsp = (struct smb2_ioctl_rsp *)rsp_iov.iov_base;
+ if (rc != 0)
+ trace_smb3_fsctl_err(xid, persistent_fid, tcon->tid,
+ ses->Suid, 0, opcode, rc);
+
if ((rc != 0) && (rc != -EINVAL)) {
cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
goto ioctl_exit;
@@ -2112,7 +2224,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
goto ioctl_exit;
}
- if (get_rfc1002_length(rsp) < le32_to_cpu(rsp->OutputOffset) + *plen) {
+ if (rsp_iov.iov_len < le32_to_cpu(rsp->OutputOffset) + *plen) {
cifs_dbg(VFS, "Malformed ioctl resp: len %d offset %d\n", *plen,
le32_to_cpu(rsp->OutputOffset));
*plen = 0;
@@ -2126,8 +2238,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
goto ioctl_exit;
}
- shdr = get_sync_hdr(rsp);
- memcpy(*out_data, (char *)shdr + le32_to_cpu(rsp->OutputOffset), *plen);
+ memcpy(*out_data, (char *)rsp + le32_to_cpu(rsp->OutputOffset), *plen);
ioctl_exit:
free_rsp_buf(resp_buftype, rsp);
return rc;
@@ -2159,8 +2270,8 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
}
int
-SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid)
+SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, int flags)
{
struct smb2_close_req *req;
struct smb2_close_rsp *rsp;
@@ -2169,7 +2280,6 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
struct kvec rsp_iov;
int resp_buftype;
int rc = 0;
- int flags = 0;
unsigned int total_len;
cifs_dbg(FYI, "Close\n");
@@ -2181,7 +2291,7 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->PersistentFileId = persistent_fid;
@@ -2196,6 +2306,8 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
if (rc != 0) {
cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE);
+ trace_smb3_close_err(xid, persistent_fid, tcon->tid, ses->Suid,
+ rc);
goto close_exit;
}
@@ -2206,14 +2318,20 @@ close_exit:
return rc;
}
+int
+SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid)
+{
+ return SMB2_close_flags(xid, tcon, persistent_fid, volatile_fid, 0);
+}
+
static int
-validate_iov(struct TCP_Server_Info *server,
- unsigned int offset, unsigned int buffer_length,
+validate_iov(unsigned int offset, unsigned int buffer_length,
struct kvec *iov, unsigned int min_buf_size)
{
unsigned int smb_len = iov->iov_len;
- char *end_of_smb = smb_len + server->vals->header_preamble_size + (char *)iov->iov_base;
- char *begin_of_buf = server->vals->header_preamble_size + offset + (char *)iov->iov_base;
+ char *end_of_smb = smb_len + (char *)iov->iov_base;
+ char *begin_of_buf = offset + (char *)iov->iov_base;
char *end_of_buf = begin_of_buf + buffer_length;
@@ -2243,18 +2361,17 @@ validate_iov(struct TCP_Server_Info *server,
* Caller must free buffer.
*/
static int
-validate_and_copy_iov(struct TCP_Server_Info *server,
- unsigned int offset, unsigned int buffer_length,
+validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
struct kvec *iov, unsigned int minbufsize,
char *data)
{
- char *begin_of_buf = server->vals->header_preamble_size + offset + (char *)(iov->iov_base);
+ char *begin_of_buf = offset + (char *)iov->iov_base;
int rc;
if (!data)
return -EINVAL;
- rc = validate_iov(server, offset, buffer_length, iov, minbufsize);
+ rc = validate_iov(offset, buffer_length, iov, minbufsize);
if (rc)
return rc;
@@ -2289,7 +2406,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->InfoType = info_type;
@@ -2315,6 +2432,8 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
+ trace_smb3_query_info_err(xid, persistent_fid, tcon->tid,
+ ses->Suid, info_class, (__u32)info_type, rc);
goto qinf_exit;
}
@@ -2332,8 +2451,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
}
}
- rc = validate_and_copy_iov(ses->server,
- le16_to_cpu(rsp->OutputBufferOffset),
+ rc = validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
le32_to_cpu(rsp->OutputBufferLength),
&rsp_iov, min_len, *data);
@@ -2404,7 +2522,7 @@ smb2_echo_callback(struct mid_q_entry *mid)
unsigned int credits_received = 1;
if (mid->mid_state == MID_RESPONSE_RECEIVED)
- credits_received = le16_to_cpu(rsp->hdr.sync_hdr.CreditRequest);
+ credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest);
DeleteMidQEntry(mid);
add_credits(server, credits_received, CIFS_ECHO_OP);
@@ -2533,7 +2651,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->PersistentFileId = persistent_fid;
@@ -2545,8 +2663,11 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
- if (rc != 0)
+ if (rc != 0) {
cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE);
+ trace_smb3_flush_err(xid, persistent_fid, tcon->tid, ses->Suid,
+ rc);
+ }
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
return rc;
@@ -2655,11 +2776,12 @@ smb2_readv_callback(struct mid_q_entry *mid)
struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
struct smb2_sync_hdr *shdr =
- (struct smb2_sync_hdr *)rdata->iov[1].iov_base;
+ (struct smb2_sync_hdr *)rdata->iov[0].iov_base;
unsigned int credits_received = 1;
struct smb_rqst rqst = { .rq_iov = rdata->iov,
.rq_nvec = 2,
.rq_pages = rdata->pages,
+ .rq_offset = rdata->page_offset,
.rq_npages = rdata->nr_pages,
.rq_pagesz = rdata->pagesz,
.rq_tailsz = rdata->tailsz };
@@ -2757,7 +2879,7 @@ smb2_async_readv(struct cifs_readdata *rdata)
return rc;
}
- if (encryption_required(io_parms.tcon))
+ if (smb3_encryption_required(io_parms.tcon))
flags |= CIFS_TRANSFORM_REQ;
req_len = cpu_to_be32(total_len);
@@ -2788,7 +2910,13 @@ smb2_async_readv(struct cifs_readdata *rdata)
if (rc) {
kref_put(&rdata->refcount, cifs_readdata_release);
cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
- }
+ trace_smb3_read_err(rc, 0 /* xid */, io_parms.persistent_fid,
+ io_parms.tcon->tid, io_parms.tcon->ses->Suid,
+ io_parms.offset, io_parms.length);
+ } else
+ trace_smb3_read_done(0 /* xid */, io_parms.persistent_fid,
+ io_parms.tcon->tid, io_parms.tcon->ses->Suid,
+ io_parms.offset, io_parms.length);
cifs_small_buf_release(buf);
return rc;
@@ -2801,7 +2929,6 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
int resp_buftype, rc = -EACCES;
struct smb2_read_plain_req *req = NULL;
struct smb2_read_rsp *rsp = NULL;
- struct smb2_sync_hdr *shdr;
struct kvec iov[1];
struct kvec rsp_iov;
unsigned int total_len;
@@ -2813,7 +2940,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
if (rc)
return rc;
- if (encryption_required(io_parms->tcon))
+ if (smb3_encryption_required(io_parms->tcon))
flags |= CIFS_TRANSFORM_REQ;
iov[0].iov_base = (char *)req;
@@ -2829,9 +2956,15 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
cifs_dbg(VFS, "Send error in read = %d\n", rc);
}
+ trace_smb3_read_err(rc, xid, req->PersistentFileId,
+ io_parms->tcon->tid, ses->Suid,
+ io_parms->offset, io_parms->length);
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
return rc == -ENODATA ? 0 : rc;
- }
+ } else
+ trace_smb3_read_done(xid, req->PersistentFileId,
+ io_parms->tcon->tid, ses->Suid,
+ io_parms->offset, io_parms->length);
*nbytes = le32_to_cpu(rsp->DataLength);
if ((*nbytes > CIFS_MAX_MSGSIZE) ||
@@ -2842,10 +2975,8 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
*nbytes = 0;
}
- shdr = get_sync_hdr(rsp);
-
if (*buf) {
- memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes);
+ memcpy(*buf, (char *)rsp + rsp->DataOffset, *nbytes);
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
} else if (resp_buftype != CIFS_NO_BUFFER) {
*buf = rsp_iov.iov_base;
@@ -2872,7 +3003,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
switch (mid->mid_state) {
case MID_RESPONSE_RECEIVED:
- credits_received = le16_to_cpu(rsp->hdr.sync_hdr.CreditRequest);
+ credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest);
wdata->result = smb2_check_receive(mid, tcon->ses->server, 0);
if (wdata->result != 0)
break;
@@ -2949,7 +3080,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
goto async_writev_out;
}
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
shdr = (struct smb2_sync_hdr *)req;
@@ -3010,6 +3141,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
rqst.rq_iov = iov;
rqst.rq_nvec = 2;
rqst.rq_pages = wdata->pages;
+ rqst.rq_offset = wdata->page_offset;
rqst.rq_npages = wdata->nr_pages;
rqst.rq_pagesz = wdata->pagesz;
rqst.rq_tailsz = wdata->tailsz;
@@ -3047,9 +3179,15 @@ smb2_async_writev(struct cifs_writedata *wdata,
wdata, flags);
if (rc) {
+ trace_smb3_write_err(0 /* no xid */, req->PersistentFileId,
+ tcon->tid, tcon->ses->Suid, wdata->offset,
+ wdata->bytes, rc);
kref_put(&wdata->refcount, release);
cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
- }
+ } else
+ trace_smb3_write_done(0 /* no xid */, req->PersistentFileId,
+ tcon->tid, tcon->ses->Suid, wdata->offset,
+ wdata->bytes);
async_writev_out:
cifs_small_buf_release(req);
@@ -3087,7 +3225,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
if (io_parms->tcon->ses->server == NULL)
return -ECONNABORTED;
- if (encryption_required(io_parms->tcon))
+ if (smb3_encryption_required(io_parms->tcon))
flags |= CIFS_TRANSFORM_REQ;
req->sync_hdr.ProcessId = cpu_to_le32(io_parms->pid);
@@ -3113,10 +3251,19 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
rsp = (struct smb2_write_rsp *)rsp_iov.iov_base;
if (rc) {
+ trace_smb3_write_err(xid, req->PersistentFileId,
+ io_parms->tcon->tid,
+ io_parms->tcon->ses->Suid,
+ io_parms->offset, io_parms->length, rc);
cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE);
cifs_dbg(VFS, "Send error in write = %d\n", rc);
- } else
+ } else {
*nbytes = le32_to_cpu(rsp->DataLength);
+ trace_smb3_write_done(xid, req->PersistentFileId,
+ io_parms->tcon->tid,
+ io_parms->tcon->ses->Suid,
+ io_parms->offset, *nbytes);
+ }
free_rsp_buf(resp_buftype, rsp);
return rc;
@@ -3197,7 +3344,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
switch (srch_inf->info_level) {
@@ -3248,7 +3395,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
if (rc) {
if (rc == -ENODATA &&
- rsp->hdr.sync_hdr.Status == STATUS_NO_MORE_FILES) {
+ rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) {
srch_inf->endOfSearch = true;
rc = 0;
}
@@ -3256,8 +3403,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
goto qdir_exit;
}
- rc = validate_iov(server,
- le16_to_cpu(rsp->OutputBufferOffset),
+ rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
info_buf_size);
if (rc)
@@ -3272,10 +3418,9 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
cifs_buf_release(srch_inf->ntwrk_buf_start);
}
srch_inf->ntwrk_buf_start = (char *)rsp;
- srch_inf->srch_entries_start = srch_inf->last_entry = 4 /* rfclen */ +
- (char *)&rsp->hdr + le16_to_cpu(rsp->OutputBufferOffset);
- /* 4 for rfc1002 length field */
- end_of_smb = get_rfc1002_length(rsp) + 4 + (char *)&rsp->hdr;
+ srch_inf->srch_entries_start = srch_inf->last_entry =
+ (char *)rsp + le16_to_cpu(rsp->OutputBufferOffset);
+ end_of_smb = rsp_iov.iov_len + (char *)rsp;
srch_inf->entries_in_buffer =
num_entries(srch_inf->srch_entries_start, end_of_smb,
&srch_inf->last_entry, info_buf_size);
@@ -3330,7 +3475,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->sync_hdr.ProcessId = cpu_to_le32(pid);
@@ -3363,8 +3508,11 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
cifs_small_buf_release(req);
rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base;
- if (rc != 0)
+ if (rc != 0) {
cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE);
+ trace_smb3_set_info_err(xid, persistent_fid, tcon->tid,
+ ses->Suid, info_class, (__u32)info_type, rc);
+ }
free_rsp_buf(resp_buftype, rsp);
kfree(iov);
@@ -3511,7 +3659,7 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
__u8 oplock_level)
{
int rc;
- struct smb2_oplock_break_req *req = NULL;
+ struct smb2_oplock_break *req = NULL;
struct cifs_ses *ses = tcon->ses;
int flags = CIFS_OBREAK_OP;
unsigned int total_len;
@@ -3525,7 +3673,7 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->VolatileFid = volatile_fid;
@@ -3590,7 +3738,7 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
req->InputBufferOffset =
cpu_to_le16(sizeof(struct smb2_query_info_req) - 1);
req->OutputBufferLength = cpu_to_le32(
- outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - server->vals->header_preamble_size);
+ outbuf_len + sizeof(struct smb2_query_info_rsp) - 1);
iov->iov_base = (char *)req;
iov->iov_len = total_len;
@@ -3607,7 +3755,6 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
int rc = 0;
int resp_buftype;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = ses->server;
struct smb2_fs_full_size_info *info = NULL;
int flags = 0;
@@ -3617,7 +3764,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
@@ -3628,10 +3775,9 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
}
rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
- info = (struct smb2_fs_full_size_info *)(server->vals->header_preamble_size +
- le16_to_cpu(rsp->OutputBufferOffset) + (char *)&rsp->hdr);
- rc = validate_iov(server,
- le16_to_cpu(rsp->OutputBufferOffset),
+ info = (struct smb2_fs_full_size_info *)(
+ le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
+ rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
sizeof(struct smb2_fs_full_size_info));
if (!rc)
@@ -3652,7 +3798,6 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
int rc = 0;
int resp_buftype, max_len, min_len;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = ses->server;
unsigned int rsp_len, offset;
int flags = 0;
@@ -3675,7 +3820,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
@@ -3688,20 +3833,20 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
rsp_len = le32_to_cpu(rsp->OutputBufferLength);
offset = le16_to_cpu(rsp->OutputBufferOffset);
- rc = validate_iov(server, offset, rsp_len, &rsp_iov, min_len);
+ rc = validate_iov(offset, rsp_len, &rsp_iov, min_len);
if (rc)
goto qfsattr_exit;
if (level == FS_ATTRIBUTE_INFORMATION)
- memcpy(&tcon->fsAttrInfo, server->vals->header_preamble_size + offset
- + (char *)&rsp->hdr, min_t(unsigned int,
+ memcpy(&tcon->fsAttrInfo, offset
+ + (char *)rsp, min_t(unsigned int,
rsp_len, max_len));
else if (level == FS_DEVICE_INFORMATION)
- memcpy(&tcon->fsDevInfo, server->vals->header_preamble_size + offset
- + (char *)&rsp->hdr, sizeof(FILE_SYSTEM_DEVICE_INFO));
+ memcpy(&tcon->fsDevInfo, offset
+ + (char *)rsp, sizeof(FILE_SYSTEM_DEVICE_INFO));
else if (level == FS_SECTOR_SIZE_INFORMATION) {
struct smb3_fs_ss_info *ss_info = (struct smb3_fs_ss_info *)
- (server->vals->header_preamble_size + offset + (char *)&rsp->hdr);
+ (offset + (char *)rsp);
tcon->ss_flags = le32_to_cpu(ss_info->Flags);
tcon->perf_sector_size =
le32_to_cpu(ss_info->PhysicalBytesPerSectorForPerf);
@@ -3732,7 +3877,7 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->sync_hdr.ProcessId = cpu_to_le32(pid);
@@ -3755,6 +3900,8 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
if (rc) {
cifs_dbg(FYI, "Send error in smb2_lockv = %d\n", rc);
cifs_stats_fail_inc(tcon, SMB2_LOCK_HE);
+ trace_smb3_lock_err(xid, persist_fid, tcon->tid,
+ tcon->ses->Suid, rc);
}
return rc;
@@ -3796,7 +3943,7 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->sync_hdr.CreditRequest = cpu_to_le16(1);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index d28f358022c5..a345560001ce 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -122,25 +122,10 @@ struct smb2_sync_pdu {
__le16 StructureSize2; /* size of wct area (varies, request specific) */
} __packed;
-struct smb2_hdr {
- __be32 smb2_buf_length; /* big endian on wire */
- /* length is only two or three bytes - with */
- /* one or two byte type preceding it that MBZ */
- struct smb2_sync_hdr sync_hdr;
-} __packed;
-
-struct smb2_pdu {
- struct smb2_hdr hdr;
- __le16 StructureSize2; /* size of wct area (varies, request specific) */
-} __packed;
-
#define SMB3_AES128CMM_NONCE 11
#define SMB3_AES128GCM_NONCE 12
struct smb2_transform_hdr {
- __be32 smb2_buf_length; /* big endian on wire */
- /* length is only two or three bytes - with
- one or two byte type preceding it that MBZ */
__le32 ProtocolId; /* 0xFD 'S' 'M' 'B' */
__u8 Signature[16];
__u8 Nonce[16];
@@ -171,7 +156,7 @@ struct smb2_transform_hdr {
#define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
struct smb2_err_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize;
__le16 Reserved; /* MBZ */
__le32 ByteCount; /* even if zero, at least one byte follows */
@@ -300,8 +285,16 @@ struct smb2_encryption_neg_context {
__le16 Ciphers[1]; /* Ciphers[0] since only one used now */
} __packed;
+#define POSIX_CTXT_DATA_LEN 8
+struct smb2_posix_neg_context {
+ __le16 ContextType; /* 0x100 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le64 Reserved1; /* In case needed for future (eg version or caps) */
+} __packed;
+
struct smb2_negotiate_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 65 */
__le16 SecurityMode;
__le16 DialectRevision;
@@ -341,7 +334,7 @@ struct smb2_sess_setup_req {
#define SMB2_SESSION_FLAG_IS_NULL 0x0002
#define SMB2_SESSION_FLAG_ENCRYPT_DATA 0x0004
struct smb2_sess_setup_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 9 */
__le16 SessionFlags;
__le16 SecurityBufferOffset;
@@ -356,7 +349,7 @@ struct smb2_logoff_req {
} __packed;
struct smb2_logoff_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */
__le16 Reserved;
} __packed;
@@ -452,7 +445,7 @@ struct smb2_tree_connect_req_extension {
} __packed;
struct smb2_tree_connect_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 16 */
__u8 ShareType; /* see below */
__u8 Reserved;
@@ -503,7 +496,7 @@ struct smb2_tree_disconnect_req {
} __packed;
struct smb2_tree_disconnect_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */
__le16 Reserved;
} __packed;
@@ -615,7 +608,9 @@ struct smb2_tree_disconnect_rsp {
#define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2 "DH2Q"
#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 "DH2C"
#define SMB2_CREATE_APP_INSTANCE_ID 0x45BCA66AEFA7F74A9008FA462E144D74
-#define SVHDX_OPEN_DEVICE_CONTEXT 0x83CE6F1AD851E0986E34401CC9BCFCE9
+#define SVHDX_OPEN_DEVICE_CONTEX 0x9CCBCF9E04C1E643980E158DA1F6EC83
+#define SMB2_CREATE_TAG_POSIX 0x93AD25509CB411E7B42383DE968BCD7C
+
struct smb2_create_req {
struct smb2_sync_hdr sync_hdr;
@@ -638,7 +633,7 @@ struct smb2_create_req {
} __packed;
struct smb2_create_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 89 */
__u8 OplockLevel;
__u8 Reserved;
@@ -727,6 +722,13 @@ struct create_durable {
} Data;
} __packed;
+struct create_posix {
+ struct create_context ccontext;
+ __u8 Name[16];
+ __le32 Mode;
+ __u32 Reserved;
+} __packed;
+
/* See MS-SMB2 2.2.13.2.11 */
/* Flags */
#define SMB2_DHANDLE_FLAG_PERSISTENT 0x00000002
@@ -894,7 +896,7 @@ struct smb2_ioctl_req {
} __packed;
struct smb2_ioctl_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 57 */
__u16 Reserved;
__le32 CtlCode;
@@ -921,7 +923,7 @@ struct smb2_close_req {
} __packed;
struct smb2_close_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* 60 */
__le16 Flags;
__le32 Reserved;
@@ -944,7 +946,7 @@ struct smb2_flush_req {
} __packed;
struct smb2_flush_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize;
__le16 Reserved;
} __packed;
@@ -976,7 +978,7 @@ struct smb2_read_plain_req {
} __packed;
struct smb2_read_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 17 */
__u8 DataOffset;
__u8 Reserved;
@@ -1007,7 +1009,7 @@ struct smb2_write_req {
} __packed;
struct smb2_write_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 17 */
__u8 DataOffset;
__u8 Reserved;
@@ -1041,7 +1043,7 @@ struct smb2_lock_req {
} __packed;
struct smb2_lock_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */
__le16 Reserved;
} __packed;
@@ -1053,7 +1055,7 @@ struct smb2_echo_req {
} __packed;
struct smb2_echo_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */
__u16 Reserved;
} __packed;
@@ -1079,7 +1081,7 @@ struct smb2_query_directory_req {
} __packed;
struct smb2_query_directory_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 9 */
__le16 OutputBufferOffset;
__le32 OutputBufferLength;
@@ -1128,7 +1130,7 @@ struct smb2_query_info_req {
} __packed;
struct smb2_query_info_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 9 */
__le16 OutputBufferOffset;
__le32 OutputBufferLength;
@@ -1150,12 +1152,11 @@ struct smb2_set_info_req {
} __packed;
struct smb2_set_info_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 2 */
} __packed;
-/* oplock break without an rfc1002 header */
-struct smb2_oplock_break_req {
+struct smb2_oplock_break {
struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 24 */
__u8 OplockLevel;
@@ -1165,21 +1166,10 @@ struct smb2_oplock_break_req {
__u64 VolatileFid;
} __packed;
-/* oplock break with an rfc1002 header */
-struct smb2_oplock_break_rsp {
- struct smb2_hdr hdr;
- __le16 StructureSize; /* Must be 24 */
- __u8 OplockLevel;
- __u8 Reserved;
- __le32 Reserved2;
- __u64 PersistentFid;
- __u64 VolatileFid;
-} __packed;
-
#define SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED cpu_to_le32(0x01)
struct smb2_lease_break {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 44 */
__le16 Reserved;
__le32 Flags;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 8ba24a95db71..908555b1c6b5 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -36,8 +36,9 @@ struct smb_rqst;
extern int map_smb2_to_linux_error(char *buf, bool log_err);
extern int smb2_check_message(char *buf, unsigned int length,
struct TCP_Server_Info *server);
-extern unsigned int smb2_calc_size(void *buf);
-extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
+extern unsigned int smb2_calc_size(void *buf, struct TCP_Server_Info *server);
+extern char *smb2_get_data_area_len(int *off, int *len,
+ struct smb2_sync_hdr *shdr);
extern __le16 *cifs_convert_path_to_utf16(const char *from,
struct cifs_sb_info *cifs_sb);
@@ -65,6 +66,8 @@ extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server,
extern int smb3_handle_read_data(struct TCP_Server_Info *server,
struct mid_q_entry *mid);
+extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_fid *pfid);
extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
struct smb2_file_all_info *src);
extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
@@ -129,6 +132,8 @@ extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
char **out_data, u32 *plen /* returned data len */);
extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
+extern int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, int flags);
extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 8806f3f76c1d..2c671123a6bf 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -480,7 +480,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
unsigned int rc;
char server_response_sig[16];
struct smb2_sync_hdr *shdr =
- (struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base;
+ (struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base;
if ((shdr->Command == SMB2_NEGOTIATE) ||
(shdr->Command == SMB2_SESSION_SETUP) ||
@@ -605,14 +605,12 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
bool log_error)
{
unsigned int len = mid->resp_buf_size;
- struct kvec iov[2];
+ struct kvec iov[1];
struct smb_rqst rqst = { .rq_iov = iov,
- .rq_nvec = 2 };
+ .rq_nvec = 1 };
iov[0].iov_base = (char *)mid->resp_buf;
- iov[0].iov_len = 4;
- iov[1].iov_base = (char *)mid->resp_buf + 4;
- iov[1].iov_len = len;
+ iov[0].iov_len = len;
dump_smb(mid->resp_buf, min_t(u32, 80, len));
/* convert the length into a more usable form */
diff --git a/fs/cifs/trace.c b/fs/cifs/trace.c
new file mode 100644
index 000000000000..bd4a546feec1
--- /dev/null
+++ b/fs/cifs/trace.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Microsoft Corporation.
+ *
+ * Author(s): Steve French <stfrench@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
new file mode 100644
index 000000000000..61e74d455d90
--- /dev/null
+++ b/fs/cifs/trace.h
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018, Microsoft Corporation.
+ *
+ * Author(s): Steve French <stfrench@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cifs
+
+#if !defined(_CIFS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _CIFS_TRACE_H
+
+#include <linux/tracepoint.h>
+
+/* For logging errors in read or write */
+DECLARE_EVENT_CLASS(smb3_rw_err_class,
+ TP_PROTO(unsigned int xid,
+ __u64 fid,
+ __u32 tid,
+ __u64 sesid,
+ __u64 offset,
+ __u32 len,
+ int rc),
+ TP_ARGS(xid, fid, tid, sesid, offset, len, rc),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u64, fid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u64, offset)
+ __field(__u32, len)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->fid = fid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->offset = offset;
+ __entry->len = len;
+ __entry->rc = rc;
+ ),
+ TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
+ __entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+ __entry->offset, __entry->len, __entry->rc)
+)
+
+#define DEFINE_SMB3_RW_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_rw_err_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u64 fid, \
+ __u32 tid, \
+ __u64 sesid, \
+ __u64 offset, \
+ __u32 len, \
+ int rc), \
+ TP_ARGS(xid, fid, tid, sesid, offset, len, rc))
+
+DEFINE_SMB3_RW_ERR_EVENT(write_err);
+DEFINE_SMB3_RW_ERR_EVENT(read_err);
+
+
+/* For logging successful read or write */
+DECLARE_EVENT_CLASS(smb3_rw_done_class,
+ TP_PROTO(unsigned int xid,
+ __u64 fid,
+ __u32 tid,
+ __u64 sesid,
+ __u64 offset,
+ __u32 len),
+ TP_ARGS(xid, fid, tid, sesid, offset, len),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u64, fid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u64, offset)
+ __field(__u32, len)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->fid = fid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->offset = offset;
+ __entry->len = len;
+ ),
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x",
+ __entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+ __entry->offset, __entry->len)
+)
+
+#define DEFINE_SMB3_RW_DONE_EVENT(name) \
+DEFINE_EVENT(smb3_rw_done_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u64 fid, \
+ __u32 tid, \
+ __u64 sesid, \
+ __u64 offset, \
+ __u32 len), \
+ TP_ARGS(xid, fid, tid, sesid, offset, len))
+
+DEFINE_SMB3_RW_DONE_EVENT(write_done);
+DEFINE_SMB3_RW_DONE_EVENT(read_done);
+
+/*
+ * For handle based calls other than read and write, and get/set info
+ */
+DECLARE_EVENT_CLASS(smb3_fd_err_class,
+ TP_PROTO(unsigned int xid,
+ __u64 fid,
+ __u32 tid,
+ __u64 sesid,
+ int rc),
+ TP_ARGS(xid, fid, tid, sesid, rc),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u64, fid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->fid = fid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->rc = rc;
+ ),
+ TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d",
+ __entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+ __entry->rc)
+)
+
+#define DEFINE_SMB3_FD_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_fd_err_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u64 fid, \
+ __u32 tid, \
+ __u64 sesid, \
+ int rc), \
+ TP_ARGS(xid, fid, tid, sesid, rc))
+
+DEFINE_SMB3_FD_ERR_EVENT(flush_err);
+DEFINE_SMB3_FD_ERR_EVENT(lock_err);
+DEFINE_SMB3_FD_ERR_EVENT(close_err);
+
+/*
+ * For handle based query/set info calls
+ */
+DECLARE_EVENT_CLASS(smb3_inf_err_class,
+ TP_PROTO(unsigned int xid,
+ __u64 fid,
+ __u32 tid,
+ __u64 sesid,
+ __u8 infclass,
+ __u32 type,
+ int rc),
+ TP_ARGS(xid, fid, tid, sesid, infclass, type, rc),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u64, fid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u8, infclass)
+ __field(__u32, type)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->fid = fid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->infclass = infclass;
+ __entry->type = type;
+ __entry->rc = rc;
+ ),
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx class=%u type=0x%x rc=%d",
+ __entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+ __entry->infclass, __entry->type, __entry->rc)
+)
+
+#define DEFINE_SMB3_INF_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_inf_err_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u64 fid, \
+ __u32 tid, \
+ __u64 sesid, \
+ __u8 infclass, \
+ __u32 type, \
+ int rc), \
+ TP_ARGS(xid, fid, tid, sesid, infclass, type, rc))
+
+DEFINE_SMB3_INF_ERR_EVENT(query_info_err);
+DEFINE_SMB3_INF_ERR_EVENT(set_info_err);
+DEFINE_SMB3_INF_ERR_EVENT(fsctl_err);
+
+/*
+ * For logging SMB3 Status code and Command for responses which return errors
+ */
+DECLARE_EVENT_CLASS(smb3_cmd_err_class,
+ TP_PROTO(__u32 tid,
+ __u64 sesid,
+ __u16 cmd,
+ __u64 mid,
+ __u32 status,
+ int rc),
+ TP_ARGS(tid, sesid, cmd, mid, status, rc),
+ TP_STRUCT__entry(
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u16, cmd)
+ __field(__u64, mid)
+ __field(__u32, status)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->cmd = cmd;
+ __entry->mid = mid;
+ __entry->status = status;
+ __entry->rc = rc;
+ ),
+ TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d",
+ __entry->sesid, __entry->tid, __entry->cmd, __entry->mid,
+ __entry->status, __entry->rc)
+)
+
+#define DEFINE_SMB3_CMD_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_cmd_err_class, smb3_##name, \
+ TP_PROTO(__u32 tid, \
+ __u64 sesid, \
+ __u16 cmd, \
+ __u64 mid, \
+ __u32 status, \
+ int rc), \
+ TP_ARGS(tid, sesid, cmd, mid, status, rc))
+
+DEFINE_SMB3_CMD_ERR_EVENT(cmd_err);
+
+DECLARE_EVENT_CLASS(smb3_cmd_done_class,
+ TP_PROTO(__u32 tid,
+ __u64 sesid,
+ __u16 cmd,
+ __u64 mid),
+ TP_ARGS(tid, sesid, cmd, mid),
+ TP_STRUCT__entry(
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u16, cmd)
+ __field(__u64, mid)
+ ),
+ TP_fast_assign(
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->cmd = cmd;
+ __entry->mid = mid;
+ ),
+ TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu",
+ __entry->sesid, __entry->tid,
+ __entry->cmd, __entry->mid)
+)
+
+#define DEFINE_SMB3_CMD_DONE_EVENT(name) \
+DEFINE_EVENT(smb3_cmd_done_class, smb3_##name, \
+ TP_PROTO(__u32 tid, \
+ __u64 sesid, \
+ __u16 cmd, \
+ __u64 mid), \
+ TP_ARGS(tid, sesid, cmd, mid))
+
+DEFINE_SMB3_CMD_DONE_EVENT(cmd_done);
+
+DECLARE_EVENT_CLASS(smb3_exit_err_class,
+ TP_PROTO(unsigned int xid,
+ const char *func_name,
+ int rc),
+ TP_ARGS(xid, func_name, rc),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(const char *, func_name)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->func_name = func_name;
+ __entry->rc = rc;
+ ),
+ TP_printk("\t%s: xid=%u rc=%d",
+ __entry->func_name, __entry->xid, __entry->rc)
+)
+
+#define DEFINE_SMB3_EXIT_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_exit_err_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ const char *func_name, \
+ int rc), \
+ TP_ARGS(xid, func_name, rc))
+
+DEFINE_SMB3_EXIT_ERR_EVENT(exit_err);
+
+DECLARE_EVENT_CLASS(smb3_enter_exit_class,
+ TP_PROTO(unsigned int xid,
+ const char *func_name),
+ TP_ARGS(xid, func_name),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(const char *, func_name)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->func_name = func_name;
+ ),
+ TP_printk("\t%s: xid=%u",
+ __entry->func_name, __entry->xid)
+)
+
+#define DEFINE_SMB3_ENTER_EXIT_EVENT(name) \
+DEFINE_EVENT(smb3_enter_exit_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ const char *func_name), \
+ TP_ARGS(xid, func_name))
+
+DEFINE_SMB3_ENTER_EXIT_EVENT(enter);
+DEFINE_SMB3_ENTER_EXIT_EVENT(exit_done);
+
+/*
+ * For smb2/smb3 open call
+ */
+DECLARE_EVENT_CLASS(smb3_open_err_class,
+ TP_PROTO(unsigned int xid,
+ __u32 tid,
+ __u64 sesid,
+ int create_options,
+ int desired_access,
+ int rc),
+ TP_ARGS(xid, tid, sesid, create_options, desired_access, rc),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(int, create_options)
+ __field(int, desired_access)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->create_options = create_options;
+ __entry->desired_access = desired_access;
+ __entry->rc = rc;
+ ),
+ TP_printk("xid=%u sid=0x%llx tid=0x%x cr_opts=0x%x des_access=0x%x rc=%d",
+ __entry->xid, __entry->sesid, __entry->tid,
+ __entry->create_options, __entry->desired_access, __entry->rc)
+)
+
+#define DEFINE_SMB3_OPEN_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_open_err_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u32 tid, \
+ __u64 sesid, \
+ int create_options, \
+ int desired_access, \
+ int rc), \
+ TP_ARGS(xid, tid, sesid, create_options, desired_access, rc))
+
+DEFINE_SMB3_OPEN_ERR_EVENT(open_err);
+
+
+DECLARE_EVENT_CLASS(smb3_open_done_class,
+ TP_PROTO(unsigned int xid,
+ __u64 fid,
+ __u32 tid,
+ __u64 sesid,
+ int create_options,
+ int desired_access),
+ TP_ARGS(xid, fid, tid, sesid, create_options, desired_access),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u64, fid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(int, create_options)
+ __field(int, desired_access)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->fid = fid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->create_options = create_options;
+ __entry->desired_access = desired_access;
+ ),
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx cr_opts=0x%x des_access=0x%x",
+ __entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+ __entry->create_options, __entry->desired_access)
+)
+
+#define DEFINE_SMB3_OPEN_DONE_EVENT(name) \
+DEFINE_EVENT(smb3_open_done_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u64 fid, \
+ __u32 tid, \
+ __u64 sesid, \
+ int create_options, \
+ int desired_access), \
+ TP_ARGS(xid, fid, tid, sesid, create_options, desired_access))
+
+DEFINE_SMB3_OPEN_DONE_EVENT(open_done);
+
+#endif /* _CIFS_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 927226a2122f..e7254e386b79 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -800,8 +800,8 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
#ifdef CONFIG_CIFS_SMB311
if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
struct kvec iov = {
- .iov_base = buf + 4,
- .iov_len = get_rfc1002_length(buf)
+ .iov_base = buf,
+ .iov_len = midQ->resp_buf_size
};
smb311_update_preauth_hash(ses, &iov, 1);
}
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 017b0ab19bc4..c4fb9ad7c808 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -492,7 +492,7 @@ static void cramfs_kill_sb(struct super_block *sb)
{
struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
- if (IS_ENABLED(CCONFIG_CRAMFS_MTD) && sb->s_mtd) {
+ if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) {
if (sbi && sbi->mtd_point_size)
mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size);
kill_mtd_super(sb);
@@ -808,10 +808,7 @@ static struct dentry *cramfs_lookup(struct inode *dir, struct dentry *dentry, un
}
out:
mutex_unlock(&read_mutex);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int cramfs_readpage(struct file *file, struct page *page)
diff --git a/fs/dax.c b/fs/dax.c
index aaec72ded1b6..aa86d9f971a4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -677,7 +677,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
* downgrading page table protection not changing it to point
* to a new page.
*
- * See Documentation/vm/mmu_notifier.txt
+ * See Documentation/vm/mmu_notifier.rst
*/
if (pmdp) {
#ifdef CONFIG_FS_DAX_PMD
diff --git a/fs/dcache.c b/fs/dcache.c
index 86d2de63461e..0e8e5de3c48a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -580,6 +580,7 @@ static void __dentry_kill(struct dentry *dentry)
spin_unlock(&dentry->d_lock);
if (likely(can_free))
dentry_free(dentry);
+ cond_resched();
}
static struct dentry *__lock_parent(struct dentry *dentry)
@@ -827,30 +828,24 @@ static inline bool fast_dput(struct dentry *dentry)
*/
void dput(struct dentry *dentry)
{
- if (unlikely(!dentry))
- return;
+ while (dentry) {
+ might_sleep();
-repeat:
- might_sleep();
+ rcu_read_lock();
+ if (likely(fast_dput(dentry))) {
+ rcu_read_unlock();
+ return;
+ }
- rcu_read_lock();
- if (likely(fast_dput(dentry))) {
+ /* Slow case: now with the dentry lock held */
rcu_read_unlock();
- return;
- }
-
- /* Slow case: now with the dentry lock held */
- rcu_read_unlock();
- if (likely(retain_dentry(dentry))) {
- spin_unlock(&dentry->d_lock);
- return;
- }
+ if (likely(retain_dentry(dentry))) {
+ spin_unlock(&dentry->d_lock);
+ return;
+ }
- dentry = dentry_kill(dentry);
- if (dentry) {
- cond_resched();
- goto repeat;
+ dentry = dentry_kill(dentry);
}
}
EXPORT_SYMBOL(dput);
@@ -907,6 +902,35 @@ repeat:
}
EXPORT_SYMBOL(dget_parent);
+static struct dentry * __d_find_any_alias(struct inode *inode)
+{
+ struct dentry *alias;
+
+ if (hlist_empty(&inode->i_dentry))
+ return NULL;
+ alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
+ __dget(alias);
+ return alias;
+}
+
+/**
+ * d_find_any_alias - find any alias for a given inode
+ * @inode: inode to find an alias for
+ *
+ * If any aliases exist for the given inode, take and return a
+ * reference for one of them. If no aliases exist, return %NULL.
+ */
+struct dentry *d_find_any_alias(struct inode *inode)
+{
+ struct dentry *de;
+
+ spin_lock(&inode->i_lock);
+ de = __d_find_any_alias(inode);
+ spin_unlock(&inode->i_lock);
+ return de;
+}
+EXPORT_SYMBOL(d_find_any_alias);
+
/**
* d_find_alias - grab a hashed alias of inode
* @inode: inode in question
@@ -923,34 +947,19 @@ EXPORT_SYMBOL(dget_parent);
*/
static struct dentry *__d_find_alias(struct inode *inode)
{
- struct dentry *alias, *discon_alias;
+ struct dentry *alias;
+
+ if (S_ISDIR(inode->i_mode))
+ return __d_find_any_alias(inode);
-again:
- discon_alias = NULL;
hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
spin_lock(&alias->d_lock);
- if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
- if (IS_ROOT(alias) &&
- (alias->d_flags & DCACHE_DISCONNECTED)) {
- discon_alias = alias;
- } else {
- __dget_dlock(alias);
- spin_unlock(&alias->d_lock);
- return alias;
- }
- }
- spin_unlock(&alias->d_lock);
- }
- if (discon_alias) {
- alias = discon_alias;
- spin_lock(&alias->d_lock);
- if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
+ if (!d_unhashed(alias)) {
__dget_dlock(alias);
spin_unlock(&alias->d_lock);
return alias;
}
spin_unlock(&alias->d_lock);
- goto again;
}
return NULL;
}
@@ -1052,8 +1061,6 @@ static void shrink_dentry_list(struct list_head *list)
while (!list_empty(list)) {
struct dentry *dentry, *parent;
- cond_resched();
-
dentry = list_entry(list->prev, struct dentry, d_lru);
spin_lock(&dentry->d_lock);
rcu_read_lock();
@@ -1230,13 +1237,11 @@ enum d_walk_ret {
* @parent: start of walk
* @data: data passed to @enter() and @finish()
* @enter: callback when first entering the dentry
- * @finish: callback when successfully finished the walk
*
- * The @enter() and @finish() callbacks are called with d_lock held.
+ * The @enter() callbacks are called with d_lock held.
*/
static void d_walk(struct dentry *parent, void *data,
- enum d_walk_ret (*enter)(void *, struct dentry *),
- void (*finish)(void *))
+ enum d_walk_ret (*enter)(void *, struct dentry *))
{
struct dentry *this_parent;
struct list_head *next;
@@ -1325,8 +1330,6 @@ ascend:
if (need_seqretry(&rename_lock, seq))
goto rename_retry;
rcu_read_unlock();
- if (finish)
- finish(data);
out_unlock:
spin_unlock(&this_parent->d_lock);
@@ -1375,7 +1378,7 @@ int path_has_submounts(const struct path *parent)
struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };
read_seqlock_excl(&mount_lock);
- d_walk(parent->dentry, &data, path_check_mount, NULL);
+ d_walk(parent->dentry, &data, path_check_mount);
read_sequnlock_excl(&mount_lock);
return data.mounted;
@@ -1483,11 +1486,16 @@ void shrink_dcache_parent(struct dentry *parent)
data.start = parent;
data.found = 0;
- d_walk(parent, &data, select_collect, NULL);
+ d_walk(parent, &data, select_collect);
+
+ if (!list_empty(&data.dispose)) {
+ shrink_dentry_list(&data.dispose);
+ continue;
+ }
+
+ cond_resched();
if (!data.found)
break;
-
- shrink_dentry_list(&data.dispose);
}
}
EXPORT_SYMBOL(shrink_dcache_parent);
@@ -1518,7 +1526,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
static void do_one_tree(struct dentry *dentry)
{
shrink_dcache_parent(dentry);
- d_walk(dentry, dentry, umount_check, NULL);
+ d_walk(dentry, dentry, umount_check);
d_drop(dentry);
dput(dentry);
}
@@ -1542,78 +1550,48 @@ void shrink_dcache_for_umount(struct super_block *sb)
}
}
-struct detach_data {
- struct select_data select;
- struct dentry *mountpoint;
-};
-static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry)
+static enum d_walk_ret find_submount(void *_data, struct dentry *dentry)
{
- struct detach_data *data = _data;
-
+ struct dentry **victim = _data;
if (d_mountpoint(dentry)) {
__dget_dlock(dentry);
- data->mountpoint = dentry;
+ *victim = dentry;
return D_WALK_QUIT;
}
-
- return select_collect(&data->select, dentry);
-}
-
-static void check_and_drop(void *_data)
-{
- struct detach_data *data = _data;
-
- if (!data->mountpoint && list_empty(&data->select.dispose))
- __d_drop(data->select.start);
+ return D_WALK_CONTINUE;
}
/**
* d_invalidate - detach submounts, prune dcache, and drop
* @dentry: dentry to invalidate (aka detach, prune and drop)
- *
- * no dcache lock.
- *
- * The final d_drop is done as an atomic operation relative to
- * rename_lock ensuring there are no races with d_set_mounted. This
- * ensures there are no unhashed dentries on the path to a mountpoint.
*/
void d_invalidate(struct dentry *dentry)
{
- /*
- * If it's already been dropped, return OK.
- */
+ bool had_submounts = false;
spin_lock(&dentry->d_lock);
if (d_unhashed(dentry)) {
spin_unlock(&dentry->d_lock);
return;
}
+ __d_drop(dentry);
spin_unlock(&dentry->d_lock);
/* Negative dentries can be dropped without further checks */
- if (!dentry->d_inode) {
- d_drop(dentry);
+ if (!dentry->d_inode)
return;
- }
+ shrink_dcache_parent(dentry);
for (;;) {
- struct detach_data data;
-
- data.mountpoint = NULL;
- INIT_LIST_HEAD(&data.select.dispose);
- data.select.start = dentry;
- data.select.found = 0;
-
- d_walk(dentry, &data, detach_and_collect, check_and_drop);
-
- if (!list_empty(&data.select.dispose))
- shrink_dentry_list(&data.select.dispose);
- else if (!data.mountpoint)
+ struct dentry *victim = NULL;
+ d_walk(dentry, &victim, find_submount);
+ if (!victim) {
+ if (had_submounts)
+ shrink_dcache_parent(dentry);
return;
-
- if (data.mountpoint) {
- detach_mounts(data.mountpoint);
- dput(data.mountpoint);
}
+ had_submounts = true;
+ detach_mounts(victim);
+ dput(victim);
}
}
EXPORT_SYMBOL(d_invalidate);
@@ -1899,6 +1877,28 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
}
EXPORT_SYMBOL(d_instantiate);
+/*
+ * This should be equivalent to d_instantiate() + unlock_new_inode(),
+ * with lockdep-related part of unlock_new_inode() done before
+ * anything else. Use that instead of open-coding d_instantiate()/
+ * unlock_new_inode() combinations.
+ */
+void d_instantiate_new(struct dentry *entry, struct inode *inode)
+{
+ BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
+ BUG_ON(!inode);
+ lockdep_annotate_inode_mutex_key(inode);
+ security_d_instantiate(entry, inode);
+ spin_lock(&inode->i_lock);
+ __d_instantiate(entry, inode);
+ WARN_ON(!(inode->i_state & I_NEW));
+ inode->i_state &= ~I_NEW;
+ smp_mb();
+ wake_up_bit(&inode->i_state, __I_NEW);
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(d_instantiate_new);
+
/**
* d_instantiate_no_diralias - instantiate a non-aliased dentry
* @entry: dentry to complete
@@ -1941,35 +1941,6 @@ struct dentry *d_make_root(struct inode *root_inode)
}
EXPORT_SYMBOL(d_make_root);
-static struct dentry * __d_find_any_alias(struct inode *inode)
-{
- struct dentry *alias;
-
- if (hlist_empty(&inode->i_dentry))
- return NULL;
- alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
- __dget(alias);
- return alias;
-}
-
-/**
- * d_find_any_alias - find any alias for a given inode
- * @inode: inode to find an alias for
- *
- * If any aliases exist for the given inode, take and return a
- * reference for one of them. If no aliases exist, return %NULL.
- */
-struct dentry *d_find_any_alias(struct inode *inode)
-{
- struct dentry *de;
-
- spin_lock(&inode->i_lock);
- de = __d_find_any_alias(inode);
- spin_unlock(&inode->i_lock);
- return de;
-}
-EXPORT_SYMBOL(d_find_any_alias);
-
static struct dentry *__d_instantiate_anon(struct dentry *dentry,
struct inode *inode,
bool disconnected)
@@ -3112,7 +3083,7 @@ static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
void d_genocide(struct dentry *parent)
{
- d_walk(parent, parent, d_genocide_kill, NULL);
+ d_walk(parent, parent, d_genocide_kill);
}
EXPORT_SYMBOL(d_genocide);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 874607bb6e02..093fb54cd316 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -432,8 +432,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
struct bio *bio;
/*
- * bio_alloc() is guaranteed to return a bio when called with
- * __GFP_RECLAIM and we request a valid number of vectors.
+ * bio_alloc() is guaranteed to return a bio when allowed to sleep and
+ * we request a valid number of vectors.
*/
bio = bio_alloc(GFP_KERNEL, nr_vecs);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 5243989a60cc..a5e4a221435c 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1037,6 +1037,7 @@ static void sctp_connect_to_sock(struct connection *con)
int result;
int addr_len;
struct socket *sock;
+ struct timeval tv = { .tv_sec = 5, .tv_usec = 0 };
if (con->nodeid == 0) {
log_print("attempt to connect sock 0 foiled");
@@ -1080,11 +1081,22 @@ static void sctp_connect_to_sock(struct connection *con)
log_print("connecting to %d", con->nodeid);
/* Turn off Nagle's algorithm */
- kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
+ kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
sizeof(one));
+ /*
+ * Make sock->ops->connect() function return in specified time,
+ * since O_NONBLOCK argument in connect() function does not work here,
+ * then, we should restore the default value of this attribute.
+ */
+ kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv,
+ sizeof(tv));
result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
- O_NONBLOCK);
+ 0);
+ memset(&tv, 0, sizeof(tv));
+ kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv,
+ sizeof(tv));
+
if (result == -EINPROGRESS)
result = 0;
if (result == 0)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 97d17eaeba07..49121e5a8de2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -283,8 +283,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
iget_failed(ecryptfs_inode);
goto out;
}
- unlock_new_inode(ecryptfs_inode);
- d_instantiate(ecryptfs_dentry, ecryptfs_inode);
+ d_instantiate_new(ecryptfs_dentry, ecryptfs_inode);
out:
return rc;
}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 08d3bd602f73..61c9514da5e9 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -101,14 +101,20 @@ static int eventfd_release(struct inode *inode, struct file *file)
return 0;
}
-static __poll_t eventfd_poll(struct file *file, poll_table *wait)
+static struct wait_queue_head *
+eventfd_get_poll_head(struct file *file, __poll_t events)
+{
+ struct eventfd_ctx *ctx = file->private_data;
+
+ return &ctx->wqh;
+}
+
+static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask)
{
struct eventfd_ctx *ctx = file->private_data;
__poll_t events = 0;
u64 count;
- poll_wait(file, &ctx->wqh, wait);
-
/*
* All writes to ctx->count occur within ctx->wqh.lock. This read
* can be done outside ctx->wqh.lock because we know that poll_wait
@@ -305,7 +311,8 @@ static const struct file_operations eventfd_fops = {
.show_fdinfo = eventfd_show_fdinfo,
#endif
.release = eventfd_release,
- .poll = eventfd_poll,
+ .get_poll_head = eventfd_get_poll_head,
+ .poll_mask = eventfd_poll_mask,
.read = eventfd_read,
.write = eventfd_write,
.llseek = noop_llseek,
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 602ca4285b2e..67db22fe99c5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -884,8 +884,7 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt,
pt->_key = epi->event.events;
if (!is_file_epoll(epi->ffd.file))
- return epi->ffd.file->f_op->poll(epi->ffd.file, pt) &
- epi->event.events;
+ return vfs_poll(epi->ffd.file, pt) & epi->event.events;
ep = epi->ffd.file->private_data;
poll_wait(epi->ffd.file, &ep->poll_wait, pt);
@@ -2025,7 +2024,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
/* The target file descriptor must support poll */
error = -EPERM;
- if (!tf.file->f_op->poll)
+ if (!file_can_poll(tf.file))
goto error_tgt_fput;
/* Check if EPOLLWAKEUP is allowed */
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 3c6a9c156b7a..ddbf87246898 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -790,7 +790,7 @@ int ore_create(struct ore_io_state *ios)
for (i = 0; i < ios->oc->numdevs; i++) {
struct osd_request *or;
- or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, i));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
ret = -ENOMEM;
@@ -815,7 +815,7 @@ int ore_remove(struct ore_io_state *ios)
for (i = 0; i < ios->oc->numdevs; i++) {
struct osd_request *or;
- or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, i));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
ret = -ENOMEM;
@@ -847,7 +847,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
struct osd_request *or;
- or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, dev));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
ret = -ENOMEM;
@@ -966,7 +966,7 @@ int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp)
return 0; /* Just an empty slot */
first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
- or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, first_dev));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
return -ENOMEM;
@@ -1060,7 +1060,7 @@ static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp,
struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
struct osd_request *or;
- or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, cur_comp));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
return -ENOMEM;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 179cd5c2f52a..719a3152da80 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -229,7 +229,7 @@ void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
u64 offset, void *p, unsigned length)
{
- struct osd_request *or = osd_start_request(od, GFP_KERNEL);
+ struct osd_request *or = osd_start_request(od);
/* struct osd_sense_info osi = {.key = 0};*/
int ret;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 1e01fabef130..71635909df3b 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1264,21 +1264,11 @@ do_indirects:
static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
{
- /*
- * XXX: it seems like a bug here that we don't allow
- * IS_APPEND inode to have blocks-past-i_size trimmed off.
- * review and fix this.
- *
- * Also would be nice to be able to handle IO errors and such,
- * but that's probably too much to ask.
- */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
return;
if (ext2_inode_is_fast_symlink(inode))
return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return;
dax_sem_down_write(EXT2_I(inode));
__ext2_truncate_blocks(inode, offset);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 55f7caadb093..152453a91877 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -41,8 +41,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = ext2_add_link(dentry, inode);
if (!err) {
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -255,8 +254,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
if (err)
goto out_fail;
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
out:
return err;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a42e71203e53..229ea4da6785 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2390,7 +2390,7 @@ extern int ext4_init_inode_table(struct super_block *sb,
extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
/* mballoc.c */
-extern const struct file_operations ext4_seq_mb_groups_fops;
+extern const struct seq_operations ext4_mb_seq_groups_ops;
extern long ext4_mb_stats;
extern long ext4_mb_max_to_scan;
extern int ext4_mb_init(struct super_block *);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 769a62708b1c..6884e81c1465 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2254,7 +2254,7 @@ out:
static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
{
- struct super_block *sb = seq->private;
+ struct super_block *sb = PDE_DATA(file_inode(seq->file));
ext4_group_t group;
if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
@@ -2265,7 +2265,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct super_block *sb = seq->private;
+ struct super_block *sb = PDE_DATA(file_inode(seq->file));
ext4_group_t group;
++*pos;
@@ -2277,7 +2277,7 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
{
- struct super_block *sb = seq->private;
+ struct super_block *sb = PDE_DATA(file_inode(seq->file));
ext4_group_t group = (ext4_group_t) ((unsigned long) v);
int i;
int err, buddy_loaded = 0;
@@ -2330,34 +2330,13 @@ static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
{
}
-static const struct seq_operations ext4_mb_seq_groups_ops = {
+const struct seq_operations ext4_mb_seq_groups_ops = {
.start = ext4_mb_seq_groups_start,
.next = ext4_mb_seq_groups_next,
.stop = ext4_mb_seq_groups_stop,
.show = ext4_mb_seq_groups_show,
};
-static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
-{
- struct super_block *sb = PDE_DATA(inode);
- int rc;
-
- rc = seq_open(file, &ext4_mb_seq_groups_ops);
- if (rc == 0) {
- struct seq_file *m = file->private_data;
- m->private = sb;
- }
- return rc;
-
-}
-
-const struct file_operations ext4_seq_mb_groups_fops = {
- .open = ext4_mb_seq_groups_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
{
int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b1f21e3a0763..4a09063ce1d2 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2411,8 +2411,7 @@ static int ext4_add_nondir(handle_t *handle,
int err = ext4_add_entry(handle, dentry, inode);
if (!err) {
ext4_mark_inode_dirty(handle, inode);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
drop_nlink(inode);
@@ -2651,8 +2650,7 @@ out_clear_inode:
err = ext4_mark_inode_dirty(handle, dir);
if (err)
goto out_clear_inode;
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 9ebd26c957c2..f34da0bb8f17 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -346,39 +346,9 @@ static struct kobject *ext4_root;
static struct kobject *ext4_feat;
-#define PROC_FILE_SHOW_DEFN(name) \
-static int name##_open(struct inode *inode, struct file *file) \
-{ \
- return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \
-} \
-\
-static const struct file_operations ext4_seq_##name##_fops = { \
- .open = name##_open, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release, \
-}
-
-#define PROC_FILE_LIST(name) \
- { __stringify(name), &ext4_seq_##name##_fops }
-
-PROC_FILE_SHOW_DEFN(es_shrinker_info);
-PROC_FILE_SHOW_DEFN(options);
-
-static const struct ext4_proc_files {
- const char *name;
- const struct file_operations *fops;
-} proc_files[] = {
- PROC_FILE_LIST(options),
- PROC_FILE_LIST(es_shrinker_info),
- PROC_FILE_LIST(mb_groups),
- { NULL, NULL },
-};
-
int ext4_register_sysfs(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- const struct ext4_proc_files *p;
int err;
init_completion(&sbi->s_kobj_unregister);
@@ -392,11 +362,14 @@ int ext4_register_sysfs(struct super_block *sb)
if (ext4_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
-
if (sbi->s_proc) {
- for (p = proc_files; p->name; p++)
- proc_create_data(p->name, S_IRUGO, sbi->s_proc,
- p->fops, sb);
+ proc_create_single_data("options", S_IRUGO, sbi->s_proc,
+ ext4_seq_options_show, sb);
+ proc_create_single_data("es_shrinker_info", S_IRUGO,
+ sbi->s_proc, ext4_seq_es_shrinker_info_show,
+ sb);
+ proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc,
+ &ext4_mb_seq_groups_ops, sb);
}
return 0;
}
@@ -404,13 +377,9 @@ int ext4_register_sysfs(struct super_block *sb)
void ext4_unregister_sysfs(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- const struct ext4_proc_files *p;
- if (sbi->s_proc) {
- for (p = proc_files; p->name; p++)
- remove_proc_entry(p->name, sbi->s_proc);
- remove_proc_entry(sb->s_id, ext4_proc_root);
- }
+ if (sbi->s_proc)
+ remove_proc_subtree(sb->s_id, ext4_proc_root);
kobject_del(&sbi->s_kobj);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index d5098efe577c..75e37fd720b2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -294,8 +294,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
alloc_nid_done(sbi, ino);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -597,8 +596,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
err = page_symlink(inode, disk_link.name, disk_link.len);
err_out:
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
/*
* Let's flush symlink data in order to avoid broken symlink as much as
@@ -661,8 +659,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
alloc_nid_done(sbi, inode->i_ino);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -713,8 +710,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
alloc_nid_done(sbi, inode->i_ino);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index f33a56d6e6dd..4b47ca6296a7 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -572,23 +572,6 @@ static int iostat_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-#define F2FS_PROC_FILE_DEF(_name) \
-static int _name##_open_fs(struct inode *inode, struct file *file) \
-{ \
- return single_open(file, _name##_seq_show, PDE_DATA(inode)); \
-} \
- \
-static const struct file_operations f2fs_seq_##_name##_fops = { \
- .open = _name##_open_fs, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release, \
-};
-
-F2FS_PROC_FILE_DEF(segment_info);
-F2FS_PROC_FILE_DEF(segment_bits);
-F2FS_PROC_FILE_DEF(iostat_info);
-
int __init f2fs_init_sysfs(void)
{
int ret;
@@ -632,12 +615,12 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
if (sbi->s_proc) {
- proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
- &f2fs_seq_segment_info_fops, sb);
- proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
- &f2fs_seq_segment_bits_fops, sb);
- proc_create_data("iostat_info", S_IRUGO, sbi->s_proc,
- &f2fs_seq_iostat_info_fops, sb);
+ proc_create_single_data("segment_info", S_IRUGO, sbi->s_proc,
+ segment_info_seq_show, sb);
+ proc_create_single_data("segment_bits", S_IRUGO, sbi->s_proc,
+ segment_bits_seq_show, sb);
+ proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc,
+ iostat_info_seq_show, sb);
}
return 0;
}
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 582ca731a6c9..484ce674e0cd 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -314,10 +314,6 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry)
int err;
mutex_lock(&MSDOS_SB(sb)->s_lock);
- /*
- * Check whether the directory is not in use, then check
- * whether it is empty.
- */
err = fat_dir_empty(inode);
if (err)
goto out;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 2649759c478a..4f4362d5a04c 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -697,15 +697,6 @@ static int vfat_find(struct inode *dir, const struct qstr *qname,
return fat_search_long(dir, qname->name, len, sinfo);
}
-/*
- * (nfsd's) anonymous disconnected dentry?
- * NOTE: !IS_ROOT() is not anonymous (I.e. d_splice_alias() did the job).
- */
-static int vfat_d_anon_disconn(struct dentry *dentry)
-{
- return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED);
-}
-
static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
@@ -738,8 +729,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
* Checking "alias->d_parent == dentry->d_parent" to make sure
* FS is not corrupted (especially double linked dir).
*/
- if (alias && alias->d_parent == dentry->d_parent &&
- !vfat_d_anon_disconn(alias)) {
+ if (alias && alias->d_parent == dentry->d_parent) {
/*
* This inode has non anonymous-DCACHE_DISCONNECTED
* dentry. This means, the user did ->lookup() by an
@@ -747,7 +737,6 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
*
* Switch to new one for reason of locality if possible.
*/
- BUG_ON(d_unhashed(alias));
if (!S_ISDIR(inode->i_mode))
d_move(alias, dentry);
iput(inode);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index d737ff082472..c42169459298 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -871,9 +871,9 @@ int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
if (fa->fa_file != filp)
continue;
- spin_lock_irq(&fa->fa_lock);
+ write_lock_irq(&fa->fa_lock);
fa->fa_file = NULL;
- spin_unlock_irq(&fa->fa_lock);
+ write_unlock_irq(&fa->fa_lock);
*fp = fa->fa_next;
call_rcu(&fa->fa_rcu, fasync_free_rcu);
@@ -918,13 +918,13 @@ struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasy
if (fa->fa_file != filp)
continue;
- spin_lock_irq(&fa->fa_lock);
+ write_lock_irq(&fa->fa_lock);
fa->fa_fd = fd;
- spin_unlock_irq(&fa->fa_lock);
+ write_unlock_irq(&fa->fa_lock);
goto out;
}
- spin_lock_init(&new->fa_lock);
+ rwlock_init(&new->fa_lock);
new->magic = FASYNC_MAGIC;
new->fa_file = filp;
new->fa_fd = fd;
@@ -987,14 +987,13 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{
while (fa) {
struct fown_struct *fown;
- unsigned long flags;
if (fa->magic != FASYNC_MAGIC) {
printk(KERN_ERR "kill_fasync: bad magic number in "
"fasync_struct!\n");
return;
}
- spin_lock_irqsave(&fa->fa_lock, flags);
+ read_lock(&fa->fa_lock);
if (fa->fa_file) {
fown = &fa->fa_file->f_owner;
/* Don't send SIGURG to processes which have not set a
@@ -1003,7 +1002,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
if (!(sig == SIGURG && fown->signum == 0))
send_sigio(fown, fa->fa_fd, band);
}
- spin_unlock_irqrestore(&fa->fa_lock, flags);
+ read_unlock(&fa->fa_lock);
fa = rcu_dereference(fa->fa_next);
}
}
diff --git a/fs/filesystems.c b/fs/filesystems.c
index f2728a4a03a1..b03f57b1105b 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -238,21 +238,9 @@ static int filesystems_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int filesystems_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, filesystems_proc_show, NULL);
-}
-
-static const struct file_operations filesystems_proc_fops = {
- .open = filesystems_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_filesystems_init(void)
{
- proc_create("filesystems", 0, NULL, &filesystems_proc_fops);
+ proc_create_single("filesystems", 0, NULL, filesystems_proc_show);
return 0;
}
module_init(proc_filesystems_init);
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index ce4785fd81c6..a51425634f65 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -193,13 +193,9 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags)
return ERR_PTR(-ENAMETOOLONG);
ino = vxfs_inode_by_name(dip, dp);
- if (ino) {
+ if (ino)
ip = vxfs_iget(dip->i_sb, ino);
- if (IS_ERR(ip))
- return ERR_CAST(ip);
- }
- d_add(dp, ip);
- return NULL;
+ return d_splice_alias(ip, dp);
}
/**
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 47d7c151fcba..471d863958bc 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1961,7 +1961,7 @@ void wb_workfn(struct work_struct *work)
}
if (!list_empty(&wb->work_list))
- mod_delayed_work(bdi_wq, &wb->dwork, 0);
+ wb_wakeup(wb);
else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
wb_wakeup_delayed(wb);
diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c
index 15a3d042247e..9a13e9e15b69 100644
--- a/fs/fscache/histogram.c
+++ b/fs/fscache/histogram.c
@@ -83,24 +83,9 @@ static void fscache_histogram_stop(struct seq_file *m, void *v)
{
}
-static const struct seq_operations fscache_histogram_ops = {
+const struct seq_operations fscache_histogram_ops = {
.start = fscache_histogram_start,
.stop = fscache_histogram_stop,
.next = fscache_histogram_next,
.show = fscache_histogram_show,
};
-
-/*
- * open "/proc/fs/fscache/histogram" to provide latency data
- */
-static int fscache_histogram_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &fscache_histogram_ops);
-}
-
-const struct file_operations fscache_histogram_fops = {
- .open = fscache_histogram_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 500650f938fe..f83328a7f048 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -31,6 +31,7 @@
#include <linux/fscache-cache.h>
#include <trace/events/fscache.h>
#include <linux/sched.h>
+#include <linux/seq_file.h>
#define FSCACHE_MIN_THREADS 4
#define FSCACHE_MAX_THREADS 32
@@ -84,7 +85,7 @@ static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif)
atomic_inc(&histogram[jif]);
}
-extern const struct file_operations fscache_histogram_fops;
+extern const struct seq_operations fscache_histogram_ops;
#else
#define fscache_hist(hist, start_jif) do {} while (0)
@@ -294,7 +295,7 @@ static inline void fscache_stat_d(atomic_t *stat)
#define __fscache_stat(stat) (stat)
-extern const struct file_operations fscache_stats_fops;
+int fscache_stats_show(struct seq_file *m, void *v);
#else
#define __fscache_stat(stat) (NULL)
diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c
index 1d9e4951a597..49a8c90414bc 100644
--- a/fs/fscache/proc.c
+++ b/fs/fscache/proc.c
@@ -26,14 +26,14 @@ int __init fscache_proc_init(void)
goto error_dir;
#ifdef CONFIG_FSCACHE_STATS
- if (!proc_create("fs/fscache/stats", S_IFREG | 0444, NULL,
- &fscache_stats_fops))
+ if (!proc_create_single("fs/fscache/stats", S_IFREG | 0444, NULL,
+ fscache_stats_show))
goto error_stats;
#endif
#ifdef CONFIG_FSCACHE_HISTOGRAM
- if (!proc_create("fs/fscache/histogram", S_IFREG | 0444, NULL,
- &fscache_histogram_fops))
+ if (!proc_create_seq("fs/fscache/histogram", S_IFREG | 0444, NULL,
+ &fscache_histogram_ops))
goto error_histogram;
#endif
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index fcc8c2f2690e..00564a1dfd76 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -138,7 +138,7 @@ atomic_t fscache_n_cache_culled_objects;
/*
* display the general statistics
*/
-static int fscache_stats_show(struct seq_file *m, void *v)
+int fscache_stats_show(struct seq_file *m, void *v)
{
seq_puts(m, "FS-Cache statistics\n");
@@ -284,18 +284,3 @@ static int fscache_stats_show(struct seq_file *m, void *v)
atomic_read(&fscache_n_cache_culled_objects));
return 0;
}
-
-/*
- * open "/proc/fs/fscache/stats" allowing provision of a statistical summary
- */
-static int fscache_stats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, fscache_stats_show, NULL);
-}
-
-const struct file_operations fscache_stats_fops = {
- .open = fscache_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index f58716567972..35f5ee23566d 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -54,8 +54,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
continue;
if (start >= to)
break;
- if (gfs2_is_jdata(ip))
- set_buffer_uptodate(bh);
+ set_buffer_uptodate(bh);
gfs2_trans_add_data(ip->i_gl, bh);
}
}
@@ -747,18 +746,21 @@ out:
put_page(page);
gfs2_trans_end(sdp);
- if (pos + len > ip->i_inode.i_size)
- gfs2_trim_blocks(&ip->i_inode);
- goto out_trans_fail;
+ if (alloc_required) {
+ gfs2_inplace_release(ip);
+ if (pos + len > ip->i_inode.i_size)
+ gfs2_trim_blocks(&ip->i_inode);
+ }
+ goto out_qunlock;
out_endtrans:
gfs2_trans_end(sdp);
out_trans_fail:
- if (alloc_required) {
+ if (alloc_required)
gfs2_inplace_release(ip);
out_qunlock:
+ if (alloc_required)
gfs2_quota_unlock(ip);
- }
out_unlock:
if (&ip->i_inode == sdp->sd_rindex) {
gfs2_glock_dq(&m_ip->i_gh);
@@ -814,7 +816,6 @@ out:
* @inode: The inode
* @dibh: The buffer_head containing the on-disk inode
* @pos: The file position
- * @len: The length of the write
* @copied: How much was actually copied by the VFS
* @page: The page
*
@@ -824,17 +825,15 @@ out:
* Returns: errno
*/
static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
- loff_t pos, unsigned len, unsigned copied,
+ loff_t pos, unsigned copied,
struct page *page)
{
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
u64 to = pos + copied;
void *kaddr;
unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
- BUG_ON(pos + len > gfs2_max_stuffed_size(ip));
+ BUG_ON(pos + copied > gfs2_max_stuffed_size(ip));
kaddr = kmap_atomic(page);
memcpy(buf + pos, kaddr + pos, copied);
@@ -850,20 +849,6 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
i_size_write(inode, to);
mark_inode_dirty(inode);
}
-
- if (inode == sdp->sd_rindex) {
- adjust_fs_space(inode);
- sdp->sd_rindex_uptodate = 0;
- }
-
- brelse(dibh);
- gfs2_trans_end(sdp);
- if (inode == sdp->sd_rindex) {
- gfs2_glock_dq(&m_ip->i_gh);
- gfs2_holder_uninit(&m_ip->i_gh);
- }
- gfs2_glock_dq(&ip->i_gh);
- gfs2_holder_uninit(&ip->i_gh);
return copied;
}
@@ -877,9 +862,8 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
* @page: The page that has been written
* @fsdata: The fsdata (unused in GFS2)
*
- * The main write_end function for GFS2. We have a separate one for
- * stuffed files as they are slightly different, otherwise we just
- * put our locking around the VFS provided functions.
+ * The main write_end function for GFS2. We just put our locking around the VFS
+ * provided functions.
*
* Returns: errno
*/
@@ -900,32 +884,39 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
ret = gfs2_meta_inode_buffer(ip, &dibh);
- if (unlikely(ret)) {
- unlock_page(page);
- put_page(page);
- goto failed;
- }
+ if (unlikely(ret))
+ goto out;
- if (gfs2_is_stuffed(ip))
- return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
+ if (gfs2_is_stuffed(ip)) {
+ ret = gfs2_stuffed_write_end(inode, dibh, pos, copied, page);
+ page = NULL;
+ goto out2;
+ }
- if (!gfs2_is_writeback(ip))
+ if (gfs2_is_jdata(ip))
gfs2_page_add_databufs(ip, page, pos & ~PAGE_MASK, len);
+ else
+ gfs2_ordered_add_inode(ip);
ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+ page = NULL;
if (tr->tr_num_buf_new)
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
else
gfs2_trans_add_meta(ip->i_gl, dibh);
-
+out2:
if (inode == sdp->sd_rindex) {
adjust_fs_space(inode);
sdp->sd_rindex_uptodate = 0;
}
brelse(dibh);
-failed:
+out:
+ if (page) {
+ unlock_page(page);
+ put_page(page);
+ }
gfs2_trans_end(sdp);
gfs2_inplace_release(ip);
if (ip->i_qadata && ip->i_qadata->qa_qd_num)
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 278ed0869c3c..a7b586e02693 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -89,10 +89,12 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
map_bh(bh, inode->i_sb, block);
set_buffer_uptodate(bh);
- if (!gfs2_is_jdata(ip))
- mark_buffer_dirty(bh);
- if (!gfs2_is_writeback(ip))
+ if (gfs2_is_jdata(ip))
gfs2_trans_add_data(ip->i_gl, bh);
+ else {
+ mark_buffer_dirty(bh);
+ gfs2_ordered_add_inode(ip);
+ }
if (release) {
unlock_page(page);
@@ -176,8 +178,8 @@ out:
/**
* find_metapath - Find path through the metadata tree
* @sdp: The superblock
- * @mp: The metapath to return the result in
* @block: The disk block to look up
+ * @mp: The metapath to return the result in
* @height: The pre-calculated height of the metadata tree
*
* This routine returns a struct metapath structure that defines a path
@@ -188,8 +190,7 @@ out:
* filesystem with a blocksize of 4096.
*
* find_metapath() would return a struct metapath structure set to:
- * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
- * and mp_list[2] = 165.
+ * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
*
* That means that in order to get to the block containing the byte at
* offset 101342453, we would load the indirect block pointed to by pointer
@@ -279,6 +280,21 @@ static inline __be64 *metapointer(unsigned int height, const struct metapath *mp
return p + mp->mp_list[height];
}
+static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
+{
+ const struct buffer_head *bh = mp->mp_bh[height];
+ return (const __be64 *)(bh->b_data + bh->b_size);
+}
+
+static void clone_metapath(struct metapath *clone, struct metapath *mp)
+{
+ unsigned int hgt;
+
+ *clone = *mp;
+ for (hgt = 0; hgt < mp->mp_aheight; hgt++)
+ get_bh(clone->mp_bh[hgt]);
+}
+
static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
{
const __be64 *t;
@@ -420,20 +436,140 @@ static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __b
return (ptr - first);
}
-static inline void bmap_lock(struct gfs2_inode *ip, int create)
+typedef const __be64 *(*gfs2_metadata_walker)(
+ struct metapath *mp,
+ const __be64 *start, const __be64 *end,
+ u64 factor, void *data);
+
+#define WALK_STOP ((__be64 *)0)
+#define WALK_NEXT ((__be64 *)1)
+
+static int gfs2_walk_metadata(struct inode *inode, sector_t lblock,
+ u64 len, struct metapath *mp, gfs2_metadata_walker walker,
+ void *data)
{
- if (create)
- down_write(&ip->i_rw_mutex);
- else
- down_read(&ip->i_rw_mutex);
+ struct metapath clone;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ const __be64 *start, *end, *ptr;
+ u64 factor = 1;
+ unsigned int hgt;
+ int ret = 0;
+
+ for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--)
+ factor *= sdp->sd_inptrs;
+
+ for (;;) {
+ u64 step;
+
+ /* Walk indirect block. */
+ start = metapointer(hgt, mp);
+ end = metaend(hgt, mp);
+
+ step = (end - start) * factor;
+ if (step > len)
+ end = start + DIV_ROUND_UP_ULL(len, factor);
+
+ ptr = walker(mp, start, end, factor, data);
+ if (ptr == WALK_STOP)
+ break;
+ if (step >= len)
+ break;
+ len -= step;
+ if (ptr != WALK_NEXT) {
+ BUG_ON(!*ptr);
+ mp->mp_list[hgt] += ptr - start;
+ goto fill_up_metapath;
+ }
+
+lower_metapath:
+ /* Decrease height of metapath. */
+ if (mp != &clone) {
+ clone_metapath(&clone, mp);
+ mp = &clone;
+ }
+ brelse(mp->mp_bh[hgt]);
+ mp->mp_bh[hgt] = NULL;
+ if (!hgt)
+ break;
+ hgt--;
+ factor *= sdp->sd_inptrs;
+
+ /* Advance in metadata tree. */
+ (mp->mp_list[hgt])++;
+ start = metapointer(hgt, mp);
+ end = metaend(hgt, mp);
+ if (start >= end) {
+ mp->mp_list[hgt] = 0;
+ if (!hgt)
+ break;
+ goto lower_metapath;
+ }
+
+fill_up_metapath:
+ /* Increase height of metapath. */
+ if (mp != &clone) {
+ clone_metapath(&clone, mp);
+ mp = &clone;
+ }
+ ret = fillup_metapath(ip, mp, ip->i_height - 1);
+ if (ret < 0)
+ break;
+ hgt += ret;
+ for (; ret; ret--)
+ do_div(factor, sdp->sd_inptrs);
+ mp->mp_aheight = hgt + 1;
+ }
+ if (mp == &clone)
+ release_metapath(mp);
+ return ret;
}
-static inline void bmap_unlock(struct gfs2_inode *ip, int create)
+struct gfs2_hole_walker_args {
+ u64 blocks;
+};
+
+static const __be64 *gfs2_hole_walker(struct metapath *mp,
+ const __be64 *start, const __be64 *end,
+ u64 factor, void *data)
{
- if (create)
- up_write(&ip->i_rw_mutex);
- else
- up_read(&ip->i_rw_mutex);
+ struct gfs2_hole_walker_args *args = data;
+ const __be64 *ptr;
+
+ for (ptr = start; ptr < end; ptr++) {
+ if (*ptr) {
+ args->blocks += (ptr - start) * factor;
+ if (mp->mp_aheight == mp->mp_fheight)
+ return WALK_STOP;
+ return ptr; /* increase height */
+ }
+ }
+ args->blocks += (end - start) * factor;
+ return WALK_NEXT;
+}
+
+/**
+ * gfs2_hole_size - figure out the size of a hole
+ * @inode: The inode
+ * @lblock: The logical starting block number
+ * @len: How far to look (in blocks)
+ * @mp: The metapath at lblock
+ * @iomap: The iomap to store the hole size in
+ *
+ * This function modifies @mp.
+ *
+ * Returns: errno on error
+ */
+static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
+ struct metapath *mp, struct iomap *iomap)
+{
+ struct gfs2_hole_walker_args args = { };
+ int ret = 0;
+
+ ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args);
+ if (!ret)
+ iomap->length = args.blocks << inode->i_blkbits;
+ return ret;
}
static inline __be64 *gfs2_indirect_init(struct metapath *mp,
@@ -462,15 +598,11 @@ enum alloc_state {
};
/**
- * gfs2_bmap_alloc - Build a metadata tree of the requested height
+ * gfs2_iomap_alloc - Build a metadata tree of the requested height
* @inode: The GFS2 inode
- * @lblock: The logical starting block of the extent
- * @bh_map: This is used to return the mapping details
- * @zero_new: True if newly allocated blocks should be zeroed
+ * @iomap: The iomap structure
+ * @flags: iomap flags
* @mp: The metapath, with proper height information calculated
- * @maxlen: The max number of data blocks to alloc
- * @dblock: Pointer to return the resulting new block
- * @dblks: Pointer to return the number of blocks allocated
*
* In this routine we may have to alloc:
* i) Indirect blocks to grow the metadata tree height
@@ -483,6 +615,13 @@ enum alloc_state {
* blocks are available, there will only be one request per bmap call)
* and uses the state machine to initialise the blocks in order.
*
+ * Right now, this function will allocate at most one indirect block
+ * worth of data -- with a default block size of 4K, that's slightly
+ * less than 2M. If this limitation is ever removed to allow huge
+ * allocations, we would probably still want to limit the iomap size we
+ * return to avoid stalling other tasks during huge writes; the next
+ * iomap iteration would then find the blocks already allocated.
+ *
* Returns: errno on error
*/
@@ -497,6 +636,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
unsigned dblks = 0;
unsigned ptrs_per_blk;
const unsigned end_of_metadata = mp->mp_fheight - 1;
+ int ret;
enum alloc_state state;
__be64 *ptr;
__be64 zero_bn = 0;
@@ -507,6 +647,8 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
gfs2_trans_add_meta(ip->i_gl, dibh);
+ down_write(&ip->i_rw_mutex);
+
if (mp->mp_fheight == mp->mp_aheight) {
struct buffer_head *bh;
int eob;
@@ -542,11 +684,10 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
blks = dblks + iblks;
i = mp->mp_aheight;
do {
- int error;
n = blks - alloced;
- error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
- if (error)
- return error;
+ ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
+ if (ret)
+ goto out;
alloced += n;
if (state != ALLOC_DATA || gfs2_is_jdata(ip))
gfs2_trans_add_unrevoke(sdp, bn, n);
@@ -602,7 +743,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
dblks = n;
ptr = metapointer(end_of_metadata, mp);
iomap->addr = bn << inode->i_blkbits;
- iomap->flags |= IOMAP_F_NEW;
+ iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
while (n-- > 0)
*ptr++ = cpu_to_be64(bn++);
break;
@@ -612,64 +753,10 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
iomap->length = (u64)dblks << inode->i_blkbits;
ip->i_height = mp->mp_fheight;
gfs2_add_inode_blocks(&ip->i_inode, alloced);
- gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
- return 0;
-}
-
-/**
- * hole_size - figure out the size of a hole
- * @inode: The inode
- * @lblock: The logical starting block number
- * @mp: The metapath
- *
- * Returns: The hole size in bytes
- *
- */
-static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct metapath mp_eof;
- u64 factor = 1;
- int hgt;
- u64 holesz = 0;
- const __be64 *first, *end, *ptr;
- const struct buffer_head *bh;
- u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
- int zeroptrs;
- bool done = false;
-
- /* Get another metapath, to the very last byte */
- find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
- for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
- bh = mp->mp_bh[hgt];
- if (bh) {
- zeroptrs = 0;
- first = metapointer(hgt, mp);
- end = (const __be64 *)(bh->b_data + bh->b_size);
-
- for (ptr = first; ptr < end; ptr++) {
- if (*ptr) {
- done = true;
- break;
- } else {
- zeroptrs++;
- }
- }
- } else {
- zeroptrs = sdp->sd_inptrs;
- }
- if (factor * zeroptrs >= lblock_stop - lblock + 1) {
- holesz = lblock_stop - lblock + 1;
- break;
- }
- holesz += factor * zeroptrs;
-
- factor *= sdp->sd_inptrs;
- if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
- (mp->mp_list[hgt - 1])++;
- }
- return holesz << inode->i_blkbits;
+ gfs2_dinode_out(ip, dibh->b_data);
+out:
+ up_write(&ip->i_rw_mutex);
+ return ret;
}
static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
@@ -685,121 +772,130 @@ static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
}
/**
- * gfs2_iomap_begin - Map blocks from an inode to disk blocks
+ * gfs2_iomap_get - Map blocks from an inode to disk blocks
* @inode: The inode
* @pos: Starting position in bytes
* @length: Length to map, in bytes
* @flags: iomap flags
* @iomap: The iomap structure
+ * @mp: The metapath
*
* Returns: errno
*/
-int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
- unsigned flags, struct iomap *iomap)
+static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
+ unsigned flags, struct iomap *iomap,
+ struct metapath *mp)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct metapath mp = { .mp_aheight = 1, };
- unsigned int factor = sdp->sd_sb.sb_bsize;
- const u64 *arr = sdp->sd_heightsize;
__be64 *ptr;
sector_t lblock;
- sector_t lend;
- int ret = 0;
+ sector_t lblock_stop;
+ int ret;
int eob;
- unsigned int len;
+ u64 len;
struct buffer_head *bh;
u8 height;
- trace_gfs2_iomap_start(ip, pos, length, flags);
- if (!length) {
- ret = -EINVAL;
- goto out;
- }
+ if (!length)
+ return -EINVAL;
if (gfs2_is_stuffed(ip)) {
if (flags & IOMAP_REPORT) {
+ if (pos >= i_size_read(inode))
+ return -ENOENT;
gfs2_stuffed_iomap(inode, iomap);
- if (pos >= iomap->length)
- ret = -ENOENT;
- goto out;
+ return 0;
}
BUG_ON(!(flags & IOMAP_WRITE));
}
-
lblock = pos >> inode->i_blkbits;
- lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
-
iomap->offset = lblock << inode->i_blkbits;
- iomap->addr = IOMAP_NULL_ADDR;
- iomap->type = IOMAP_HOLE;
- iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
- iomap->flags = IOMAP_F_MERGED;
- bmap_lock(ip, flags & IOMAP_WRITE);
+ lblock_stop = (pos + length - 1) >> inode->i_blkbits;
+ len = lblock_stop - lblock + 1;
- /*
- * Directory data blocks have a struct gfs2_meta_header header, so the
- * remaining size is smaller than the filesystem block size. Logical
- * block numbers for directories are in units of this remaining size!
- */
- if (gfs2_is_dir(ip)) {
- factor = sdp->sd_jbsize;
- arr = sdp->sd_jheightsize;
- }
+ down_read(&ip->i_rw_mutex);
- ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
+ ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]);
if (ret)
- goto out_release;
+ goto unlock;
height = ip->i_height;
- while ((lblock + 1) * factor > arr[height])
+ while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
height++;
- find_metapath(sdp, lblock, &mp, height);
+ find_metapath(sdp, lblock, mp, height);
if (height > ip->i_height || gfs2_is_stuffed(ip))
goto do_alloc;
- ret = lookup_metapath(ip, &mp);
+ ret = lookup_metapath(ip, mp);
if (ret)
- goto out_release;
+ goto unlock;
- if (mp.mp_aheight != ip->i_height)
+ if (mp->mp_aheight != ip->i_height)
goto do_alloc;
- ptr = metapointer(ip->i_height - 1, &mp);
+ ptr = metapointer(ip->i_height - 1, mp);
if (*ptr == 0)
goto do_alloc;
- iomap->type = IOMAP_MAPPED;
- iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
+ bh = mp->mp_bh[ip->i_height - 1];
+ len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob);
- bh = mp.mp_bh[ip->i_height - 1];
- len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
+ iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
+ iomap->length = len << inode->i_blkbits;
+ iomap->type = IOMAP_MAPPED;
+ iomap->flags = IOMAP_F_MERGED;
if (eob)
iomap->flags |= IOMAP_F_BOUNDARY;
- iomap->length = (u64)len << inode->i_blkbits;
-out_release:
- release_metapath(&mp);
- bmap_unlock(ip, flags & IOMAP_WRITE);
out:
- trace_gfs2_iomap_end(ip, iomap, ret);
+ iomap->bdev = inode->i_sb->s_bdev;
+unlock:
+ up_read(&ip->i_rw_mutex);
return ret;
do_alloc:
- if (flags & IOMAP_WRITE) {
- ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
- } else if (flags & IOMAP_REPORT) {
+ iomap->addr = IOMAP_NULL_ADDR;
+ iomap->length = len << inode->i_blkbits;
+ iomap->type = IOMAP_HOLE;
+ iomap->flags = 0;
+ if (flags & IOMAP_REPORT) {
loff_t size = i_size_read(inode);
if (pos >= size)
ret = -ENOENT;
- else if (height <= ip->i_height)
- iomap->length = hole_size(inode, lblock, &mp);
+ else if (height == ip->i_height)
+ ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
else
iomap->length = size - pos;
}
- goto out_release;
+ goto out;
+}
+
+static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
+ unsigned flags, struct iomap *iomap)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct metapath mp = { .mp_aheight = 1, };
+ int ret;
+
+ trace_gfs2_iomap_start(ip, pos, length, flags);
+ if (flags & IOMAP_WRITE) {
+ ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+ if (!ret && iomap->type == IOMAP_HOLE)
+ ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+ release_metapath(&mp);
+ } else {
+ ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+ release_metapath(&mp);
+ }
+ trace_gfs2_iomap_end(ip, iomap, ret);
+ return ret;
}
+const struct iomap_ops gfs2_iomap_ops = {
+ .iomap_begin = gfs2_iomap_begin,
+};
+
/**
* gfs2_block_map - Map one or more blocks of an inode to a disk block
* @inode: The inode
@@ -825,25 +921,34 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
struct buffer_head *bh_map, int create)
{
struct gfs2_inode *ip = GFS2_I(inode);
- struct iomap iomap;
- int ret, flags = 0;
+ loff_t pos = (loff_t)lblock << inode->i_blkbits;
+ loff_t length = bh_map->b_size;
+ struct metapath mp = { .mp_aheight = 1, };
+ struct iomap iomap = { };
+ int ret;
clear_buffer_mapped(bh_map);
clear_buffer_new(bh_map);
clear_buffer_boundary(bh_map);
trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
- if (create)
- flags |= IOMAP_WRITE;
- ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
- bh_map->b_size, flags, &iomap);
- if (ret) {
- if (!create && ret == -ENOENT) {
- /* Return unmapped buffer beyond the end of file. */
+ if (create) {
+ ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
+ if (!ret && iomap.type == IOMAP_HOLE)
+ ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
+ release_metapath(&mp);
+ } else {
+ ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
+ release_metapath(&mp);
+
+ /* Return unmapped buffer beyond the end of file. */
+ if (ret == -ENOENT) {
ret = 0;
+ goto out;
}
- goto out;
}
+ if (ret)
+ goto out;
if (iomap.length > bh_map->b_size) {
iomap.length = bh_map->b_size;
@@ -945,8 +1050,10 @@ static int gfs2_block_zero_range(struct inode *inode, loff_t from,
err = 0;
}
- if (!gfs2_is_writeback(ip))
+ if (gfs2_is_jdata(ip))
gfs2_trans_add_data(ip->i_gl, bh);
+ else
+ gfs2_ordered_add_inode(ip);
zero_user(page, offset, length);
mark_buffer_dirty(bh);
@@ -1056,6 +1163,19 @@ out:
return error;
}
+int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
+ struct iomap *iomap)
+{
+ struct metapath mp = { .mp_aheight = 1, };
+ int ret;
+
+ ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
+ if (!ret && iomap->type == IOMAP_HOLE)
+ ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
+ release_metapath(&mp);
+ return ret;
+}
+
/**
* sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
* @ip: inode
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index c3402fe00653..6b18fb323f0a 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -46,11 +46,13 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
}
}
+extern const struct iomap_ops gfs2_iomap_ops;
+
extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
extern int gfs2_block_map(struct inode *inode, sector_t lblock,
struct buffer_head *bh, int create);
-extern int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
- unsigned flags, struct iomap *iomap);
+extern int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
+ struct iomap *iomap);
extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
u64 *dblock, unsigned *extlen);
extern int gfs2_setattr_size(struct inode *inode, u64 size);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4b71f021a9e2..7137db7b0119 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -733,7 +733,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
struct gfs2_inode *ip = GFS2_I(inode);
loff_t end = offset + len;
struct buffer_head *dibh;
- struct iomap iomap;
+ struct iomap iomap = { };
int error;
error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -749,8 +749,8 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
}
while (offset < end) {
- error = gfs2_iomap_begin(inode, offset, end - offset,
- IOMAP_WRITE, &iomap);
+ error = gfs2_iomap_get_alloc(inode, offset, end - offset,
+ &iomap);
if (error)
goto out;
offset = iomap.offset + iomap.length;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 1b6b1e3f5caf..d2ad817e089f 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -116,6 +116,7 @@ static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
{
+ BUG_ON(rbm->offset >= rbm->rgd->rd_data);
return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
rbm->offset;
}
@@ -696,8 +697,6 @@ struct gfs2_sbd {
u32 sd_max_dirres; /* Max blocks needed to add a directory entry */
u32 sd_max_height; /* Max height of a file's metadata tree */
u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1];
- u32 sd_max_jheight; /* Max height of journaled file's meta tree */
- u64 sd_jheightsize[GFS2_MAX_META_HEIGHT + 1];
u32 sd_max_dents_per_leaf; /* Max number of dirents in a leaf block */
struct gfs2_args sd_args; /* Mount arguments */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8700eb815638..feda55f67050 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -2006,10 +2006,6 @@ static int gfs2_getattr(const struct path *path, struct kstat *stat,
return 0;
}
-const struct iomap_ops gfs2_iomap_ops = {
- .iomap_begin = gfs2_iomap_begin,
-};
-
static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 1862e310a067..20241436126d 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/writeback.h>
#include "incore.h"
+#include "inode.h"
/**
* gfs2_log_lock - acquire the right to mess with the log manager
@@ -50,8 +51,12 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct gfs2_sbd *sdp;
+ if (!gfs2_is_ordered(ip))
+ return;
+
+ sdp = GFS2_SB(&ip->i_inode);
if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
spin_lock(&sdp->sd_ordered_lock);
if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3ba3f167641c..c2469833b4fb 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -335,25 +335,6 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
sdp->sd_heightsize[x] = ~0;
gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
- sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_dinode);
- sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
- for (x = 2;; x++) {
- u64 space, d;
- u32 m;
-
- space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
- d = space;
- m = do_div(d, sdp->sd_inptrs);
-
- if (d != sdp->sd_jheightsize[x - 1] || m)
- break;
- sdp->sd_jheightsize[x] = space;
- }
- sdp->sd_max_jheight = x;
- sdp->sd_jheightsize[x] = ~0;
- gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
-
sdp->sd_max_dents_per_leaf = (sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_leaf)) /
GFS2_MIN_DIRENT_SIZE;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 7a98abd340ee..e8585dfd209f 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -735,7 +735,10 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
if (!buffer_uptodate(bh))
goto unlock_out;
}
- gfs2_trans_add_data(ip->i_gl, bh);
+ if (gfs2_is_jdata(ip))
+ gfs2_trans_add_data(ip->i_gl, bh);
+ else
+ gfs2_ordered_add_inode(ip);
/* If we need to write to the next block as well */
if (to_write > (bsize - boff)) {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8b683917a27e..6bc5cfe710d1 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -372,8 +372,8 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
start = bi->bi_bh->b_data;
if (bi->bi_clone)
start = bi->bi_clone;
- end = start + bi->bi_bh->b_size;
start += bi->bi_offset;
+ end = start + bi->bi_len;
BUG_ON(rbm.offset & 3);
start += (rbm.offset / GFS2_NBBY);
bytes = min_t(u32, len / GFS2_NBBY, (end - start));
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index c75cacaa349b..064c9a0ef046 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -143,32 +143,21 @@ static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
* @gl: The inode glock associated with the buffer
* @bh: The buffer to add
*
- * This is used in two distinct cases:
- * i) In ordered write mode
- * We put the data buffer on a list so that we can ensure that it's
- * synced to disk at the right time
- * ii) In journaled data mode
- * We need to journal the data block in the same way as metadata in
- * the functions above. The difference is that here we have a tag
- * which is two __be64's being the block number (as per meta data)
- * and a flag which says whether the data block needs escaping or
- * not. This means we need a new log entry for each 251 or so data
- * blocks, which isn't an enormous overhead but twice as much as
- * for normal metadata blocks.
+ * This is used in journaled data mode.
+ * We need to journal the data block in the same way as metadata in
+ * the functions above. The difference is that here we have a tag
+ * which is two __be64's being the block number (as per meta data)
+ * and a flag which says whether the data block needs escaping or
+ * not. This means we need a new log entry for each 251 or so data
+ * blocks, which isn't an enormous overhead but twice as much as
+ * for normal metadata blocks.
*/
void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
{
struct gfs2_trans *tr = current->journal_info;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct address_space *mapping = bh->b_page->mapping;
- struct gfs2_inode *ip = GFS2_I(mapping->host);
struct gfs2_bufdata *bd;
- if (!gfs2_is_jdata(ip)) {
- gfs2_ordered_add_inode(ip);
- return;
- }
-
lock_buffer(bh);
if (buffer_pinned(bh)) {
set_bit(TR_TOUCHED, &tr->tr_flags);
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 75b254280ff6..3bf2ae0e467c 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -31,21 +31,15 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
res = hfs_brec_read(&fd, &rec, sizeof(rec));
if (res) {
- hfs_find_exit(&fd);
- if (res == -ENOENT) {
- /* No such entry */
- inode = NULL;
- goto done;
- }
- return ERR_PTR(res);
+ if (res != -ENOENT)
+ inode = ERR_PTR(res);
+ } else {
+ inode = hfs_iget(dir->i_sb, &fd.search_key->cat, &rec);
+ if (!inode)
+ inode = ERR_PTR(-EACCES);
}
- inode = hfs_iget(dir->i_sb, &fd.search_key->cat, &rec);
hfs_find_exit(&fd);
- if (!inode)
- return ERR_PTR(-EACCES);
-done:
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
/*
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2538b49cc349..b3309b83371a 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -543,9 +543,9 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
igrab(dir);
hlist_add_fake(&inode->i_hash);
mark_inode_dirty(inode);
+ dont_mount(dentry);
out:
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
void hfs_evict_inode(struct inode *inode)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 15e06fb552da..b5254378f011 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -122,8 +122,7 @@ again:
if (S_ISREG(inode->i_mode))
HFSPLUS_I(inode)->linkid = linkid;
out:
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
fail:
hfs_find_exit(&fd);
return ERR_PTR(err);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 513c357c734b..a6c0f54c48c3 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -588,6 +588,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
return 0;
out_put_hidden_dir:
+ cancel_delayed_work_sync(&sbi->sync_work);
iput(sbi->hidden_dir);
out_put_root:
dput(sb->s_root);
diff --git a/fs/inode.c b/fs/inode.c
index 13ceb98c3bd3..3b55391072f3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -178,6 +178,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
+ mapping->wb_err = 0;
atomic_set(&mapping->i_mmap_writable, 0);
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->private_data = NULL;
diff --git a/fs/internal.h b/fs/internal.h
index e08972db0303..980d005b21b4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,6 +125,7 @@ int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
+extern int open_check_o_direct(struct file *f);
extern int vfs_open(const struct path *, struct file *, const struct cred *);
extern struct file *filp_clone_open(struct file *);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 0a754f38462e..e5a6deb38e1e 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -209,8 +209,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
__func__, inode->i_ino, inode->i_mode, inode->i_nlink,
f->inocache->pino_nlink, inode->i_mapping->nrpages);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
fail:
@@ -430,8 +429,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
fail:
@@ -575,8 +573,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
fail:
@@ -747,8 +744,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
fail:
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index a70907606025..35a5b2a81ae0 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -29,7 +29,6 @@
#ifdef PROC_FS_JFS /* see jfs_debug.h */
-static struct proc_dir_entry *base;
#ifdef CONFIG_JFS_DEBUG
static int jfs_loglevel_proc_show(struct seq_file *m, void *v)
{
@@ -66,43 +65,29 @@ static const struct file_operations jfs_loglevel_proc_fops = {
};
#endif
-static struct {
- const char *name;
- const struct file_operations *proc_fops;
-} Entries[] = {
-#ifdef CONFIG_JFS_STATISTICS
- { "lmstats", &jfs_lmstats_proc_fops, },
- { "txstats", &jfs_txstats_proc_fops, },
- { "xtstat", &jfs_xtstat_proc_fops, },
- { "mpstat", &jfs_mpstat_proc_fops, },
-#endif
-#ifdef CONFIG_JFS_DEBUG
- { "TxAnchor", &jfs_txanchor_proc_fops, },
- { "loglevel", &jfs_loglevel_proc_fops }
-#endif
-};
-#define NPROCENT ARRAY_SIZE(Entries)
-
void jfs_proc_init(void)
{
- int i;
+ struct proc_dir_entry *base;
- if (!(base = proc_mkdir("fs/jfs", NULL)))
+ base = proc_mkdir("fs/jfs", NULL);
+ if (!base)
return;
- for (i = 0; i < NPROCENT; i++)
- proc_create(Entries[i].name, 0, base, Entries[i].proc_fops);
+#ifdef CONFIG_JFS_STATISTICS
+ proc_create_single("lmstats", 0, base, jfs_lmstats_proc_show);
+ proc_create_single("txstats", 0, base, jfs_txstats_proc_show);
+ proc_create_single("xtstat", 0, base, jfs_xtstat_proc_show);
+ proc_create_single("mpstat", 0, base, jfs_mpstat_proc_show);
+#endif
+#ifdef CONFIG_JFS_DEBUG
+ proc_create_single("TxAnchor", 0, base, jfs_txanchor_proc_show);
+ proc_create("loglevel", 0, base, &jfs_loglevel_proc_fops);
+#endif
}
void jfs_proc_clean(void)
{
- int i;
-
- if (base) {
- for (i = 0; i < NPROCENT; i++)
- remove_proc_entry(Entries[i].name, base);
- remove_proc_entry("fs/jfs", NULL);
- }
+ remove_proc_subtree("fs/jfs", NULL);
}
#endif /* PROC_FS_JFS */
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index eafd1300a00b..0d9e35da8462 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -62,7 +62,7 @@ extern void jfs_proc_clean(void);
extern int jfsloglevel;
-extern const struct file_operations jfs_txanchor_proc_fops;
+int jfs_txanchor_proc_show(struct seq_file *m, void *v);
/* information message: e.g., configuration, major event */
#define jfs_info(fmt, arg...) do { \
@@ -105,10 +105,10 @@ extern const struct file_operations jfs_txanchor_proc_fops;
* ----------
*/
#ifdef CONFIG_JFS_STATISTICS
-extern const struct file_operations jfs_lmstats_proc_fops;
-extern const struct file_operations jfs_txstats_proc_fops;
-extern const struct file_operations jfs_mpstat_proc_fops;
-extern const struct file_operations jfs_xtstat_proc_fops;
+int jfs_lmstats_proc_show(struct seq_file *m, void *v);
+int jfs_txstats_proc_show(struct seq_file *m, void *v);
+int jfs_mpstat_proc_show(struct seq_file *m, void *v);
+int jfs_xtstat_proc_show(struct seq_file *m, void *v);
#define INCREMENT(x) ((x)++)
#define DECREMENT(x) ((x)--)
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 0e5d412c0b01..6b68df395892 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2493,7 +2493,7 @@ exit:
}
#ifdef CONFIG_JFS_STATISTICS
-static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
+int jfs_lmstats_proc_show(struct seq_file *m, void *v)
{
seq_printf(m,
"JFS Logmgr stats\n"
@@ -2510,16 +2510,4 @@ static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
lmStat.partial_page);
return 0;
}
-
-static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_lmstats_proc_show, NULL);
-}
-
-const struct file_operations jfs_lmstats_proc_fops = {
- .open = jfs_lmstats_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_JFS_STATISTICS */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 1a3b0cc22ad3..fa2c6824c7f2 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -815,7 +815,7 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
}
#ifdef CONFIG_JFS_STATISTICS
-static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
+int jfs_mpstat_proc_show(struct seq_file *m, void *v)
{
seq_printf(m,
"JFS Metapage statistics\n"
@@ -828,16 +828,4 @@ static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
mpStat.lockwait);
return 0;
}
-
-static int jfs_mpstat_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_mpstat_proc_show, NULL);
-}
-
-const struct file_operations jfs_mpstat_proc_fops = {
- .open = jfs_mpstat_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 4d973524c887..a5663cb621d8 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2998,7 +2998,7 @@ int jfs_sync(void *arg)
}
#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
-static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
+int jfs_txanchor_proc_show(struct seq_file *m, void *v)
{
char *freewait;
char *freelockwait;
@@ -3032,22 +3032,10 @@ static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
return 0;
}
-
-static int jfs_txanchor_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_txanchor_proc_show, NULL);
-}
-
-const struct file_operations jfs_txanchor_proc_fops = {
- .open = jfs_txanchor_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
-static int jfs_txstats_proc_show(struct seq_file *m, void *v)
+int jfs_txstats_proc_show(struct seq_file *m, void *v)
{
seq_printf(m,
"JFS TxStats\n"
@@ -3072,16 +3060,4 @@ static int jfs_txstats_proc_show(struct seq_file *m, void *v)
TxStat.txLockAlloc_freelock);
return 0;
}
-
-static int jfs_txstats_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_txstats_proc_show, NULL);
-}
-
-const struct file_operations jfs_txstats_proc_fops = {
- .open = jfs_txstats_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 5cde6d2fcfca..2c200b5256a6 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -3874,7 +3874,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
}
#ifdef CONFIG_JFS_STATISTICS
-static int jfs_xtstat_proc_show(struct seq_file *m, void *v)
+int jfs_xtstat_proc_show(struct seq_file *m, void *v)
{
seq_printf(m,
"JFS Xtree statistics\n"
@@ -3887,16 +3887,4 @@ static int jfs_xtstat_proc_show(struct seq_file *m, void *v)
xtStat.split);
return 0;
}
-
-static int jfs_xtstat_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_xtstat_proc_show, NULL);
-}
-
-const struct file_operations jfs_xtstat_proc_fops = {
- .open = jfs_xtstat_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index b41596d71858..56c3fcbfe80e 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -178,8 +178,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
unlock_new_inode(ip);
iput(ip);
} else {
- unlock_new_inode(ip);
- d_instantiate(dentry, ip);
+ d_instantiate_new(dentry, ip);
}
out2:
@@ -313,8 +312,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
unlock_new_inode(ip);
iput(ip);
} else {
- unlock_new_inode(ip);
- d_instantiate(dentry, ip);
+ d_instantiate_new(dentry, ip);
}
out2:
@@ -1059,8 +1057,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
unlock_new_inode(ip);
iput(ip);
} else {
- unlock_new_inode(ip);
- d_instantiate(dentry, ip);
+ d_instantiate_new(dentry, ip);
}
out2:
@@ -1447,8 +1444,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
unlock_new_inode(ip);
iput(ip);
} else {
- unlock_new_inode(ip);
- d_instantiate(dentry, ip);
+ d_instantiate_new(dentry, ip);
}
out1:
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 26dd9a50f383..ff2716f9322e 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -316,6 +316,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
info->root = root;
info->ns = ns;
+ INIT_LIST_HEAD(&info->node);
sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags,
&init_user_ns, info);
diff --git a/fs/locks.c b/fs/locks.c
index 62bbe8b31f26..05e211be8684 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2788,22 +2788,10 @@ static const struct seq_operations locks_seq_operations = {
.show = locks_show,
};
-static int locks_open(struct inode *inode, struct file *filp)
-{
- return seq_open_private(filp, &locks_seq_operations,
- sizeof(struct locks_iterator));
-}
-
-static const struct file_operations proc_locks_operations = {
- .open = locks_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
static int __init proc_locks_init(void)
{
- proc_create("locks", 0, NULL, &proc_locks_operations);
+ proc_create_seq_private("locks", 0, NULL, &locks_seq_operations,
+ sizeof(struct locks_iterator), NULL);
return 0;
}
fs_initcall(proc_locks_init);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ccf0f00030bf..1a6084d2b02e 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -28,13 +28,9 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, un
return ERR_PTR(-ENAMETOOLONG);
ino = minix_inode_by_name(dentry);
- if (ino) {
+ if (ino)
inode = minix_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- }
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
diff --git a/fs/namei.c b/fs/namei.c
index 20335896dcce..6df1f61855d6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1440,10 +1440,8 @@ static int path_parent_directory(struct path *path)
static int follow_dotdot(struct nameidata *nd)
{
while(1) {
- if (nd->path.dentry == nd->root.dentry &&
- nd->path.mnt == nd->root.mnt) {
+ if (path_equal(&nd->path, &nd->root))
break;
- }
if (nd->path.dentry != nd->path.mnt->mnt_root) {
int ret = path_parent_directory(&nd->path);
if (ret)
@@ -3374,7 +3372,9 @@ finish_open_created:
goto out;
*opened |= FILE_OPENED;
opened:
- error = ima_file_check(file, op->acc_mode, *opened);
+ error = open_check_o_direct(file);
+ if (!error)
+ error = ima_file_check(file, op->acc_mode, *opened);
if (!error && will_truncate)
error = handle_truncate(file);
out:
@@ -3454,6 +3454,9 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
error = finish_open(file, child, NULL, opened);
if (error)
goto out2;
+ error = open_check_o_direct(file);
+ if (error)
+ fput(file);
out2:
mnt_drop_write(path.mnt);
out:
@@ -3855,11 +3858,11 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
if (error)
goto out;
- shrink_dcache_parent(dentry);
error = dir->i_op->rmdir(dir, dentry);
if (error)
goto out;
+ shrink_dcache_parent(dentry);
dentry->d_inode->i_flags |= S_DEAD;
dont_mount(dentry);
detach_mounts(dentry);
@@ -4442,8 +4445,6 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dir->i_nlink >= max_links)
goto out;
}
- if (is_dir && !(flags & RENAME_EXCHANGE) && target)
- shrink_dcache_parent(new_dentry);
if (!is_dir) {
error = try_break_deleg(source, delegated_inode);
if (error)
@@ -4460,8 +4461,10 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
if (!(flags & RENAME_EXCHANGE) && target) {
- if (is_dir)
+ if (is_dir) {
+ shrink_dcache_parent(new_dentry);
target->i_flags |= S_DEAD;
+ }
dont_mount(new_dentry);
detach_mounts(new_dentry);
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b9129e2befea..bbc91d7ca1bd 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1067,7 +1067,6 @@ void nfs_clients_init(struct net *net)
}
#ifdef CONFIG_PROC_FS
-static int nfs_server_list_open(struct inode *inode, struct file *file);
static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
static void nfs_server_list_stop(struct seq_file *p, void *v);
@@ -1080,14 +1079,6 @@ static const struct seq_operations nfs_server_list_ops = {
.show = nfs_server_list_show,
};
-static const struct file_operations nfs_server_list_fops = {
- .open = nfs_server_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-static int nfs_volume_list_open(struct inode *inode, struct file *file);
static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
static void nfs_volume_list_stop(struct seq_file *p, void *v);
@@ -1100,23 +1091,6 @@ static const struct seq_operations nfs_volume_list_ops = {
.show = nfs_volume_list_show,
};
-static const struct file_operations nfs_volume_list_fops = {
- .open = nfs_volume_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-/*
- * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
- * we're dealing
- */
-static int nfs_server_list_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &nfs_server_list_ops,
- sizeof(struct seq_net_private));
-}
-
/*
* set up the iterator to start reading from the server list and return the first item
*/
@@ -1185,15 +1159,6 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
}
/*
- * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
- */
-static int nfs_volume_list_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &nfs_volume_list_ops,
- sizeof(struct seq_net_private));
-}
-
-/*
* set up the iterator to start reading from the volume list and return the first item
*/
static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
@@ -1278,14 +1243,14 @@ int nfs_fs_proc_net_init(struct net *net)
goto error_0;
/* a file of servers with which we're dealing */
- p = proc_create("servers", S_IFREG|S_IRUGO,
- nn->proc_nfsfs, &nfs_server_list_fops);
+ p = proc_create_net("servers", S_IFREG|S_IRUGO, nn->proc_nfsfs,
+ &nfs_server_list_ops, sizeof(struct seq_net_private));
if (!p)
goto error_1;
/* a file of volumes that we have mounted */
- p = proc_create("volumes", S_IFREG|S_IRUGO,
- nn->proc_nfsfs, &nfs_volume_list_fops);
+ p = proc_create_net("volumes", S_IFREG|S_IRUGO, nn->proc_nfsfs,
+ &nfs_volume_list_ops, sizeof(struct seq_net_private));
if (!p)
goto error_1;
return 0;
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 70b8bf781fce..a43dfedd69ec 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -227,7 +227,7 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
if (!buf)
return -ENOMEM;
- rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
+ rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
if (IS_ERR(rq)) {
error = -ENOMEM;
goto out_free_buf;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2410b093a2e6..b0555d7d8200 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1201,6 +1201,28 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
break;
case S_IFDIR:
host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+ if (!host_err && unlikely(d_unhashed(dchild))) {
+ struct dentry *d;
+ d = lookup_one_len(dchild->d_name.name,
+ dchild->d_parent,
+ dchild->d_name.len);
+ if (IS_ERR(d)) {
+ host_err = PTR_ERR(d);
+ break;
+ }
+ if (unlikely(d_is_negative(d))) {
+ dput(d);
+ err = nfserr_serverfault;
+ goto out;
+ }
+ dput(resfhp->fh_dentry);
+ resfhp->fh_dentry = dget(d);
+ err = fh_update(resfhp);
+ dput(dchild);
+ dchild = d;
+ if (err)
+ goto out;
+ }
break;
case S_IFCHR:
case S_IFBLK:
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 1a2894aa0194..dd52d3f82e8d 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -46,8 +46,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
int err = nilfs_add_link(dentry, inode);
if (!err) {
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -243,8 +242,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
nilfs_mark_inode_dirty(inode);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
out:
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 91a8889abf9b..ea8c551bcd7e 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -570,16 +570,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
current_page, vec_len, vec_start);
len = bio_add_page(bio, page, vec_len, vec_start);
- if (len != vec_len) {
- mlog(ML_ERROR, "Adding page[%d] to bio failed, "
- "page %p, len %d, vec_len %u, vec_start %u, "
- "bi_sector %llu\n", current_page, page, len,
- vec_len, vec_start,
- (unsigned long long)bio->bi_iter.bi_sector);
- bio_put(bio);
- bio = ERR_PTR(-EIO);
- return bio;
- }
+ if (len != vec_len) break;
cs += vec_len / (PAGE_SIZE/spp);
vec_start = 0;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 01c6b3894406..7869622af22a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4250,10 +4250,11 @@ out:
static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry, bool preserve)
{
- int error;
+ int error, had_lock;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *old_bh = NULL;
struct inode *new_orphan_inode = NULL;
+ struct ocfs2_lock_holder oh;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP;
@@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
goto out;
}
+ had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1,
+ &oh);
+ if (had_lock < 0) {
+ error = had_lock;
+ mlog_errno(error);
+ goto out;
+ }
+
/* If the security isn't preserved, we need to re-initialize them. */
if (!preserve) {
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
@@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
if (error)
mlog_errno(error);
}
-out:
if (!error) {
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
new_dentry);
if (error)
mlog_errno(error);
}
+ ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock);
+out:
if (new_orphan_inode) {
/*
* We need to open_unlock the inode no matter whether we
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index b7146526afff..4bee3a72b9f3 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -305,11 +305,10 @@ static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry,
ino_t ino = be64_to_cpu(oi->i_head.h_self);
brelse(bh);
inode = omfs_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
+ } else if (bh != ERR_PTR(-ENOENT)) {
+ inode = ERR_CAST(bh);
}
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
/* sanity check block's self pointer */
diff --git a/fs/open.c b/fs/open.c
index c5ee7cd60424..d0e955b558ad 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -724,6 +724,16 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
return ksys_fchown(fd, user, group);
}
+int open_check_o_direct(struct file *f)
+{
+ /* NB: we're sure to have correct a_ops only after f_op->open */
+ if (f->f_flags & O_DIRECT) {
+ if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int do_dentry_open(struct file *f,
struct inode *inode,
int (*open)(struct inode *, struct file *),
@@ -745,7 +755,7 @@ static int do_dentry_open(struct file *f,
if (unlikely(f->f_flags & O_PATH)) {
f->f_mode = FMODE_PATH;
f->f_op = &empty_fops;
- goto done;
+ return 0;
}
if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
@@ -798,12 +808,7 @@ static int do_dentry_open(struct file *f,
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
-done:
- /* NB: we're sure to have correct a_ops only after f_op->open */
- error = -EINVAL;
- if ((f->f_flags & O_DIRECT) &&
- (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO))
- goto out_fput;
+
return 0;
cleanup_all:
@@ -818,9 +823,6 @@ cleanup_file:
f->f_path.dentry = NULL;
f->f_inode = NULL;
return error;
-out_fput:
- fput(f);
- return error;
}
/**
@@ -918,14 +920,20 @@ struct file *dentry_open(const struct path *path, int flags,
BUG_ON(!path->mnt);
f = get_empty_filp();
- if (IS_ERR(f))
- return f;
-
- f->f_flags = flags;
- error = vfs_open(path, f, cred);
- if (error) {
- put_filp(f);
- return ERR_PTR(error);
+ if (!IS_ERR(f)) {
+ f->f_flags = flags;
+ error = vfs_open(path, f, cred);
+ if (!error) {
+ /* from now on we need fput() to dispose of f */
+ error = open_check_o_direct(f);
+ if (error) {
+ fput(f);
+ f = ERR_PTR(error);
+ }
+ } else {
+ put_filp(f);
+ f = ERR_PTR(error);
+ }
}
return f;
}
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 2200662a9bf1..607092f367ad 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -256,8 +256,7 @@ found:
break;
}
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int openpromfs_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index 6e3134e6d98a..365cd73d9109 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -75,8 +75,7 @@ static int orangefs_create(struct inode *dir,
get_khandle_from_ino(inode),
dentry);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
@@ -111,7 +110,6 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
struct orangefs_inode_s *parent = ORANGEFS_I(dir);
struct orangefs_kernel_op_s *new_op;
struct inode *inode;
- struct dentry *res;
int ret = -EINVAL;
/*
@@ -159,65 +157,18 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
new_op->downcall.resp.lookup.refn.fs_id,
ret);
- if (ret < 0) {
- if (ret == -ENOENT) {
- /*
- * if no inode was found, add a negative dentry to
- * dcache anyway; if we don't, we don't hold expected
- * lookup semantics and we most noticeably break
- * during directory renames.
- *
- * however, if the operation failed or exited, do not
- * add the dentry (e.g. in the case that a touch is
- * issued on a file that already exists that was
- * interrupted during this lookup -- no need to add
- * another negative dentry for an existing file)
- */
-
- gossip_debug(GOSSIP_NAME_DEBUG,
- "orangefs_lookup: Adding *negative* dentry "
- "%p for %pd\n",
- dentry,
- dentry);
-
- d_add(dentry, NULL);
- res = NULL;
- goto out;
- }
-
+ if (ret >= 0) {
+ orangefs_set_timeout(dentry);
+ inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
+ } else if (ret == -ENOENT) {
+ inode = NULL;
+ } else {
/* must be a non-recoverable error */
- res = ERR_PTR(ret);
- goto out;
- }
-
- orangefs_set_timeout(dentry);
-
- inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
- if (IS_ERR(inode)) {
- gossip_debug(GOSSIP_NAME_DEBUG,
- "error %ld from iget\n", PTR_ERR(inode));
- res = ERR_CAST(inode);
- goto out;
+ inode = ERR_PTR(ret);
}
- gossip_debug(GOSSIP_NAME_DEBUG,
- "%s:%s:%d "
- "Found good inode [%lu] with count [%d]\n",
- __FILE__,
- __func__,
- __LINE__,
- inode->i_ino,
- (int)atomic_read(&inode->i_count));
-
- /* update dentry/inode pair into dcache */
- res = d_splice_alias(inode, dentry);
-
- gossip_debug(GOSSIP_NAME_DEBUG,
- "Lookup success (inode ct = %d)\n",
- (int)atomic_read(&inode->i_count));
-out:
op_release(new_op);
- return res;
+ return d_splice_alias(inode, dentry);
}
/* return 0 on success; non-zero otherwise */
@@ -332,8 +283,7 @@ static int orangefs_symlink(struct inode *dir,
"Assigned symlink inode new number of %pU\n",
get_khandle_from_ino(inode));
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
@@ -402,8 +352,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
"Assigned dir inode new number of %pU\n",
get_khandle_from_ino(inode));
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
diff --git a/fs/pipe.c b/fs/pipe.c
index 39d6f431da83..bb0840e234f3 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -509,19 +509,22 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
}
-/* No kernel lock held - fine */
-static __poll_t
-pipe_poll(struct file *filp, poll_table *wait)
+static struct wait_queue_head *
+pipe_get_poll_head(struct file *filp, __poll_t events)
{
- __poll_t mask;
struct pipe_inode_info *pipe = filp->private_data;
- int nrbufs;
- poll_wait(filp, &pipe->wait, wait);
+ return &pipe->wait;
+}
+
+/* No kernel lock held - fine */
+static __poll_t pipe_poll_mask(struct file *filp, __poll_t events)
+{
+ struct pipe_inode_info *pipe = filp->private_data;
+ int nrbufs = pipe->nrbufs;
+ __poll_t mask = 0;
/* Reading only -- no need for acquiring the semaphore. */
- nrbufs = pipe->nrbufs;
- mask = 0;
if (filp->f_mode & FMODE_READ) {
mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0;
if (!pipe->writers && filp->f_version != pipe->w_counter)
@@ -1020,7 +1023,8 @@ const struct file_operations pipefifo_fops = {
.llseek = no_llseek,
.read_iter = pipe_read,
.write_iter = pipe_write,
- .poll = pipe_poll,
+ .get_poll_head = pipe_get_poll_head,
+ .poll_mask = pipe_poll_mask,
.unlocked_ioctl = pipe_ioctl,
.release = pipe_release,
.fasync = pipe_fasync,
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ae2c807fd719..e6d7f41b6684 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -85,6 +85,7 @@
#include <linux/delayacct.h>
#include <linux/seq_file.h>
#include <linux/pid_namespace.h>
+#include <linux/prctl.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
#include <linux/string_helpers.h>
@@ -335,6 +336,30 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
#ifdef CONFIG_SECCOMP
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
#endif
+ seq_printf(m, "\nSpeculation_Store_Bypass:\t");
+ switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
+ case -EINVAL:
+ seq_printf(m, "unknown");
+ break;
+ case PR_SPEC_NOT_AFFECTED:
+ seq_printf(m, "not vulnerable");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
+ seq_printf(m, "thread force mitigated");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
+ seq_printf(m, "thread mitigated");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_ENABLE:
+ seq_printf(m, "thread vulnerable");
+ break;
+ case PR_SPEC_DISABLE:
+ seq_printf(m, "globally mitigated");
+ break;
+ default:
+ seq_printf(m, "vulnerable");
+ break;
+ }
seq_putc(m, '\n');
}
@@ -677,25 +702,22 @@ out:
static int children_seq_show(struct seq_file *seq, void *v)
{
- struct inode *inode = seq->private;
- pid_t pid;
-
- pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
- seq_printf(seq, "%d ", pid);
+ struct inode *inode = file_inode(seq->file);
+ seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode)));
return 0;
}
static void *children_seq_start(struct seq_file *seq, loff_t *pos)
{
- return get_children_pid(seq->private, NULL, *pos);
+ return get_children_pid(file_inode(seq->file), NULL, *pos);
}
static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct pid *pid;
- pid = get_children_pid(seq->private, v, *pos + 1);
+ pid = get_children_pid(file_inode(seq->file), v, *pos + 1);
put_pid(v);
++*pos;
@@ -716,17 +738,7 @@ static const struct seq_operations children_seq_ops = {
static int children_seq_open(struct inode *inode, struct file *file)
{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &children_seq_ops);
- if (ret)
- return ret;
-
- m = file->private_data;
- m->private = inode;
-
- return ret;
+ return seq_open(file, &children_seq_ops);
}
const struct file_operations proc_tid_children_operations = {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1b2ede6abcdf..33ed1746927a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -261,7 +261,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
* Inherently racy -- command line shares address space
* with code and data.
*/
- rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0);
+ rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON);
if (rv <= 0)
goto out_free_page;
@@ -279,7 +279,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
int nr_read;
_count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
+ nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -325,7 +325,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
bool final;
_count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
+ nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -698,7 +698,7 @@ static bool has_pid_permissions(struct pid_namespace *pid,
static int proc_pid_permission(struct inode *inode, int mask)
{
- struct pid_namespace *pid = inode->i_sb->s_fs_info;
+ struct pid_namespace *pid = proc_pid_ns(inode);
struct task_struct *task;
bool has_perms;
@@ -733,13 +733,11 @@ static const struct inode_operations proc_def_inode_operations = {
static int proc_single_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns;
- struct pid *pid;
+ struct pid_namespace *ns = proc_pid_ns(inode);
+ struct pid *pid = proc_pid(inode);
struct task_struct *task;
int ret;
- ns = inode->i_sb->s_fs_info;
- pid = proc_pid(inode);
task = get_pid_task(pid, PIDTYPE_PID);
if (!task)
return -ESRCH;
@@ -946,7 +944,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
max_len = min_t(size_t, PAGE_SIZE, count);
this_len = min(max_len, this_len);
- retval = access_remote_vm(mm, (env_start + src), page, this_len, 0);
+ retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
if (retval <= 0) {
ret = retval;
@@ -1410,7 +1408,7 @@ static const struct file_operations proc_fail_nth_operations = {
static int sched_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns = inode->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(inode);
struct task_struct *p;
p = get_proc_task(inode);
@@ -1782,8 +1780,8 @@ int pid_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
+ struct pid_namespace *pid = proc_pid_ns(inode);
struct task_struct *task;
- struct pid_namespace *pid = path->dentry->d_sb->s_fs_info;
generic_fillattr(inode, stat);
@@ -1809,15 +1807,22 @@ int pid_getattr(const struct path *path, struct kstat *stat,
/* dentry stuff */
/*
- * Exceptional case: normally we are not allowed to unhash a busy
- * directory. In this case, however, we can do it - no aliasing problems
- * due to the way we treat inodes.
- *
+ * Set <pid>/... inode ownership (can change due to setuid(), etc.)
+ */
+void pid_update_inode(struct task_struct *task, struct inode *inode)
+{
+ task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
+
+ inode->i_mode &= ~(S_ISUID | S_ISGID);
+ security_task_to_inode(task, inode);
+}
+
+/*
* Rewrite the inode's ownerships here because the owning task may have
* performed a setuid(), etc.
*
*/
-int pid_revalidate(struct dentry *dentry, unsigned int flags)
+static int pid_revalidate(struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
struct task_struct *task;
@@ -1829,10 +1834,7 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
task = get_proc_task(inode);
if (task) {
- task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
-
- inode->i_mode &= ~(S_ISUID | S_ISGID);
- security_task_to_inode(task, inode);
+ pid_update_inode(task, inode);
put_task_struct(task);
return 1;
}
@@ -1880,8 +1882,8 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
struct dentry *child, *dir = file->f_path.dentry;
struct qstr qname = QSTR_INIT(name, len);
struct inode *inode;
- unsigned type;
- ino_t ino;
+ unsigned type = DT_UNKNOWN;
+ ino_t ino = 1;
child = d_hash_and_lookup(dir, &qname);
if (!child) {
@@ -1890,22 +1892,23 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
if (IS_ERR(child))
goto end_instantiate;
if (d_in_lookup(child)) {
- int err = instantiate(d_inode(dir), child, task, ptr);
+ struct dentry *res;
+ res = instantiate(child, task, ptr);
d_lookup_done(child);
- if (err < 0) {
- dput(child);
+ if (IS_ERR(res))
goto end_instantiate;
+ if (unlikely(res)) {
+ dput(child);
+ child = res;
}
}
}
inode = d_inode(child);
ino = inode->i_ino;
type = inode->i_mode >> 12;
+end_instantiate:
dput(child);
return dir_emit(ctx, name, len, ino, type);
-
-end_instantiate:
- return dir_emit(ctx, name, len, 1, DT_UNKNOWN);
}
/*
@@ -2067,19 +2070,19 @@ static const struct inode_operations proc_map_files_link_inode_operations = {
.setattr = proc_setattr,
};
-static int
-proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
+static struct dentry *
+proc_map_files_instantiate(struct dentry *dentry,
struct task_struct *task, const void *ptr)
{
fmode_t mode = (fmode_t)(unsigned long)ptr;
struct proc_inode *ei;
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK |
+ inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK |
((mode & FMODE_READ ) ? S_IRUSR : 0) |
((mode & FMODE_WRITE) ? S_IWUSR : 0));
if (!inode)
- return -ENOENT;
+ return ERR_PTR(-ENOENT);
ei = PROC_I(inode);
ei->op.proc_get_link = map_files_get_link;
@@ -2088,9 +2091,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
inode->i_size = 64;
d_set_d_op(dentry, &tid_map_files_dentry_operations);
- d_add(dentry, inode);
-
- return 0;
+ return d_splice_alias(inode, dentry);
}
static struct dentry *proc_map_files_lookup(struct inode *dir,
@@ -2099,19 +2100,19 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
unsigned long vm_start, vm_end;
struct vm_area_struct *vma;
struct task_struct *task;
- int result;
+ struct dentry *result;
struct mm_struct *mm;
- result = -ENOENT;
+ result = ERR_PTR(-ENOENT);
task = get_proc_task(dir);
if (!task)
goto out;
- result = -EACCES;
+ result = ERR_PTR(-EACCES);
if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
goto out_put_task;
- result = -ENOENT;
+ result = ERR_PTR(-ENOENT);
if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
goto out_put_task;
@@ -2125,7 +2126,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
goto out_no_vma;
if (vma->vm_file)
- result = proc_map_files_instantiate(dir, dentry, task,
+ result = proc_map_files_instantiate(dentry, task,
(void *)(unsigned long)vma->vm_file->f_mode);
out_no_vma:
@@ -2134,7 +2135,7 @@ out_no_vma:
out_put_task:
put_task_struct(task);
out:
- return ERR_PTR(result);
+ return result;
}
static const struct inode_operations proc_map_files_inode_operations = {
@@ -2337,7 +2338,7 @@ static int proc_timers_open(struct inode *inode, struct file *file)
return -ENOMEM;
tp->pid = proc_pid(inode);
- tp->ns = inode->i_sb->s_fs_info;
+ tp->ns = proc_pid_ns(inode);
return 0;
}
@@ -2435,16 +2436,16 @@ static const struct file_operations proc_pid_set_timerslack_ns_operations = {
.release = single_release,
};
-static int proc_pident_instantiate(struct inode *dir,
- struct dentry *dentry, struct task_struct *task, const void *ptr)
+static struct dentry *proc_pident_instantiate(struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
{
const struct pid_entry *p = ptr;
struct inode *inode;
struct proc_inode *ei;
- inode = proc_pid_make_inode(dir->i_sb, task, p->mode);
+ inode = proc_pid_make_inode(dentry->d_sb, task, p->mode);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOENT);
ei = PROC_I(inode);
if (S_ISDIR(inode->i_mode))
@@ -2454,13 +2455,9 @@ static int proc_pident_instantiate(struct inode *dir,
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
+ pid_update_inode(task, inode);
d_set_d_op(dentry, &pid_dentry_operations);
- d_add(dentry, inode);
- /* Close the race of the process dying before we return the dentry */
- if (pid_revalidate(dentry, 0))
- return 0;
-out:
- return -ENOENT;
+ return d_splice_alias(inode, dentry);
}
static struct dentry *proc_pident_lookup(struct inode *dir,
@@ -2468,11 +2465,9 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
const struct pid_entry *ents,
unsigned int nents)
{
- int error;
struct task_struct *task = get_proc_task(dir);
const struct pid_entry *p, *last;
-
- error = -ENOENT;
+ struct dentry *res = ERR_PTR(-ENOENT);
if (!task)
goto out_no_task;
@@ -2491,11 +2486,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
if (p >= last)
goto out;
- error = proc_pident_instantiate(dir, dentry, task, p);
+ res = proc_pident_instantiate(dentry, task, p);
out:
put_task_struct(task);
out_no_task:
- return ERR_PTR(error);
+ return res;
}
static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
@@ -3138,38 +3133,32 @@ void proc_flush_task(struct task_struct *task)
}
}
-static int proc_pid_instantiate(struct inode *dir,
- struct dentry * dentry,
+static struct dentry *proc_pid_instantiate(struct dentry * dentry,
struct task_struct *task, const void *ptr)
{
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
+ inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOENT);
inode->i_op = &proc_tgid_base_inode_operations;
inode->i_fop = &proc_tgid_base_operations;
inode->i_flags|=S_IMMUTABLE;
set_nlink(inode, nlink_tgid);
+ pid_update_inode(task, inode);
d_set_d_op(dentry, &pid_dentry_operations);
-
- d_add(dentry, inode);
- /* Close the race of the process dying before we return the dentry */
- if (pid_revalidate(dentry, 0))
- return 0;
-out:
- return -ENOENT;
+ return d_splice_alias(inode, dentry);
}
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- int result = -ENOENT;
struct task_struct *task;
unsigned tgid;
struct pid_namespace *ns;
+ struct dentry *result = ERR_PTR(-ENOENT);
tgid = name_to_int(&dentry->d_name);
if (tgid == ~0U)
@@ -3184,10 +3173,10 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsign
if (!task)
goto out;
- result = proc_pid_instantiate(dir, dentry, task, NULL);
+ result = proc_pid_instantiate(dentry, task, NULL);
put_task_struct(task);
out:
- return ERR_PTR(result);
+ return result;
}
/*
@@ -3239,7 +3228,7 @@ retry:
int proc_pid_readdir(struct file *file, struct dir_context *ctx)
{
struct tgid_iter iter;
- struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(file_inode(file));
loff_t pos = ctx->pos;
if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
@@ -3435,37 +3424,32 @@ static const struct inode_operations proc_tid_base_inode_operations = {
.setattr = proc_setattr,
};
-static int proc_task_instantiate(struct inode *dir,
- struct dentry *dentry, struct task_struct *task, const void *ptr)
+static struct dentry *proc_task_instantiate(struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
{
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
-
+ inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOENT);
+
inode->i_op = &proc_tid_base_inode_operations;
inode->i_fop = &proc_tid_base_operations;
- inode->i_flags|=S_IMMUTABLE;
+ inode->i_flags |= S_IMMUTABLE;
set_nlink(inode, nlink_tid);
+ pid_update_inode(task, inode);
d_set_d_op(dentry, &pid_dentry_operations);
-
- d_add(dentry, inode);
- /* Close the race of the process dying before we return the dentry */
- if (pid_revalidate(dentry, 0))
- return 0;
-out:
- return -ENOENT;
+ return d_splice_alias(inode, dentry);
}
static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- int result = -ENOENT;
struct task_struct *task;
struct task_struct *leader = get_proc_task(dir);
unsigned tid;
struct pid_namespace *ns;
+ struct dentry *result = ERR_PTR(-ENOENT);
if (!leader)
goto out_no_task;
@@ -3485,13 +3469,13 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
if (!same_thread_group(leader, task))
goto out_drop_task;
- result = proc_task_instantiate(dir, dentry, task, NULL);
+ result = proc_task_instantiate(dentry, task, NULL);
out_drop_task:
put_task_struct(task);
out:
put_task_struct(leader);
out_no_task:
- return ERR_PTR(result);
+ return result;
}
/*
@@ -3588,7 +3572,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
/* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
*/
- ns = inode->i_sb->s_fs_info;
+ ns = proc_pid_ns(inode);
tid = (int)file->f_version;
file->f_version = 0;
for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
index 8233e7af9389..fa762c5fbcb2 100644
--- a/fs/proc/cmdline.c
+++ b/fs/proc/cmdline.c
@@ -11,21 +11,9 @@ static int cmdline_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int cmdline_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, cmdline_proc_show, NULL);
-}
-
-static const struct file_operations cmdline_proc_fops = {
- .open = cmdline_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_cmdline_init(void)
{
- proc_create("cmdline", 0, NULL, &cmdline_proc_fops);
+ proc_create_single("cmdline", 0, NULL, cmdline_proc_show);
return 0;
}
fs_initcall(proc_cmdline_init);
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index a8ac48aebd59..954caf0b7fee 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -91,21 +91,9 @@ static const struct seq_operations consoles_op = {
.show = show_console_dev
};
-static int consoles_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &consoles_op);
-}
-
-static const struct file_operations proc_consoles_operations = {
- .open = consoles_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_consoles_init(void)
{
- proc_create("consoles", 0, NULL, &proc_consoles_operations);
+ proc_create_seq("consoles", 0, NULL, &consoles_op);
return 0;
}
fs_initcall(proc_consoles_init);
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index 2c7f22b14489..37d38697eaf8 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -51,21 +51,9 @@ static const struct seq_operations devinfo_ops = {
.show = devinfo_show
};
-static int devinfo_open(struct inode *inode, struct file *filp)
-{
- return seq_open(filp, &devinfo_ops);
-}
-
-static const struct file_operations proc_devinfo_operations = {
- .open = devinfo_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_devices_init(void)
{
- proc_create("devices", 0, NULL, &proc_devinfo_operations);
+ proc_create_seq("devices", 0, NULL, &devinfo_ops);
return 0;
}
fs_initcall(proc_devices_init);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 6b80cd1e419a..05b9893e9a22 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -81,9 +81,41 @@ static const struct file_operations proc_fdinfo_file_operations = {
.release = single_release,
};
+static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
+{
+ struct files_struct *files = get_files_struct(task);
+ struct file *file;
+
+ if (!files)
+ return false;
+
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file)
+ *mode = file->f_mode;
+ rcu_read_unlock();
+ put_files_struct(files);
+ return !!file;
+}
+
+static void tid_fd_update_inode(struct task_struct *task, struct inode *inode,
+ fmode_t f_mode)
+{
+ task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
+
+ if (S_ISLNK(inode->i_mode)) {
+ unsigned i_mode = S_IFLNK;
+ if (f_mode & FMODE_READ)
+ i_mode |= S_IRUSR | S_IXUSR;
+ if (f_mode & FMODE_WRITE)
+ i_mode |= S_IWUSR | S_IXUSR;
+ inode->i_mode = i_mode;
+ }
+ security_task_to_inode(task, inode);
+}
+
static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
{
- struct files_struct *files;
struct task_struct *task;
struct inode *inode;
unsigned int fd;
@@ -96,35 +128,11 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
fd = proc_fd(inode);
if (task) {
- files = get_files_struct(task);
- if (files) {
- struct file *file;
-
- rcu_read_lock();
- file = fcheck_files(files, fd);
- if (file) {
- unsigned f_mode = file->f_mode;
-
- rcu_read_unlock();
- put_files_struct(files);
-
- task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
-
- if (S_ISLNK(inode->i_mode)) {
- unsigned i_mode = S_IFLNK;
- if (f_mode & FMODE_READ)
- i_mode |= S_IRUSR | S_IXUSR;
- if (f_mode & FMODE_WRITE)
- i_mode |= S_IWUSR | S_IXUSR;
- inode->i_mode = i_mode;
- }
-
- security_task_to_inode(task, inode);
- put_task_struct(task);
- return 1;
- }
- rcu_read_unlock();
- put_files_struct(files);
+ fmode_t f_mode;
+ if (tid_fd_mode(task, fd, &f_mode)) {
+ tid_fd_update_inode(task, inode, f_mode);
+ put_task_struct(task);
+ return 1;
}
put_task_struct(task);
}
@@ -166,34 +174,33 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
return ret;
}
-static int
-proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
- struct task_struct *task, const void *ptr)
+struct fd_data {
+ fmode_t mode;
+ unsigned fd;
+};
+
+static struct dentry *proc_fd_instantiate(struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
{
- unsigned fd = (unsigned long)ptr;
+ const struct fd_data *data = ptr;
struct proc_inode *ei;
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK);
+ inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOENT);
ei = PROC_I(inode);
- ei->fd = fd;
+ ei->fd = data->fd;
inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64;
ei->op.proc_get_link = proc_fd_link;
+ tid_fd_update_inode(task, inode, data->mode);
d_set_d_op(dentry, &tid_fd_dentry_operations);
- d_add(dentry, inode);
-
- /* Close the race of the process dying before we return the dentry */
- if (tid_fd_revalidate(dentry, 0))
- return 0;
- out:
- return -ENOENT;
+ return d_splice_alias(inode, dentry);
}
static struct dentry *proc_lookupfd_common(struct inode *dir,
@@ -201,19 +208,21 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
instantiate_t instantiate)
{
struct task_struct *task = get_proc_task(dir);
- int result = -ENOENT;
- unsigned fd = name_to_int(&dentry->d_name);
+ struct fd_data data = {.fd = name_to_int(&dentry->d_name)};
+ struct dentry *result = ERR_PTR(-ENOENT);
if (!task)
goto out_no_task;
- if (fd == ~0U)
+ if (data.fd == ~0U)
+ goto out;
+ if (!tid_fd_mode(task, data.fd, &data.mode))
goto out;
- result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
+ result = instantiate(dentry, task, &data);
out:
put_task_struct(task);
out_no_task:
- return ERR_PTR(result);
+ return result;
}
static int proc_readfd_common(struct file *file, struct dir_context *ctx,
@@ -236,17 +245,22 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
for (fd = ctx->pos - 2;
fd < files_fdtable(files)->max_fds;
fd++, ctx->pos++) {
+ struct file *f;
+ struct fd_data data;
char name[10 + 1];
int len;
- if (!fcheck_files(files, fd))
+ f = fcheck_files(files, fd);
+ if (!f)
continue;
+ data.mode = f->f_mode;
rcu_read_unlock();
+ data.fd = fd;
len = snprintf(name, sizeof(name), "%u", fd);
if (!proc_fill_cache(file, ctx,
name, len, instantiate, p,
- (void *)(unsigned long)fd))
+ &data))
goto out_fd_loop;
cond_resched();
rcu_read_lock();
@@ -304,31 +318,25 @@ const struct inode_operations proc_fd_inode_operations = {
.setattr = proc_setattr,
};
-static int
-proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
- struct task_struct *task, const void *ptr)
+static struct dentry *proc_fdinfo_instantiate(struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
{
- unsigned fd = (unsigned long)ptr;
+ const struct fd_data *data = ptr;
struct proc_inode *ei;
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task, S_IFREG | S_IRUSR);
+ inode = proc_pid_make_inode(dentry->d_sb, task, S_IFREG | S_IRUSR);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOENT);
ei = PROC_I(inode);
- ei->fd = fd;
+ ei->fd = data->fd;
inode->i_fop = &proc_fdinfo_file_operations;
+ tid_fd_update_inode(task, inode, 0);
d_set_d_op(dentry, &tid_fd_dentry_operations);
- d_add(dentry, inode);
-
- /* Close the race of the process dying before we return the dentry */
- if (tid_fd_revalidate(dentry, 0))
- return 0;
- out:
- return -ENOENT;
+ return d_splice_alias(inode, dentry);
}
static struct dentry *
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2078e70e1595..7b4d9714f248 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -25,6 +25,7 @@
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/uaccess.h>
+#include <linux/seq_file.h>
#include "internal.h"
@@ -256,8 +257,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
if (!inode)
return ERR_PTR(-ENOMEM);
d_set_d_op(dentry, &proc_misc_dentry_ops);
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
read_unlock(&proc_subdir_lock);
return ERR_PTR(-ENOENT);
@@ -346,13 +346,12 @@ static const struct inode_operations proc_dir_inode_operations = {
.setattr = proc_notify_change,
};
-static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
+/* returns the registered entry, or frees dp and returns NULL on failure */
+struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
+ struct proc_dir_entry *dp)
{
- int ret;
-
- ret = proc_alloc_inum(&dp->low_ino);
- if (ret)
- return ret;
+ if (proc_alloc_inum(&dp->low_ino))
+ goto out_free_entry;
write_lock(&proc_subdir_lock);
dp->parent = dir;
@@ -360,12 +359,16 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
WARN(1, "proc_dir_entry '%s/%s' already registered\n",
dir->name, dp->name);
write_unlock(&proc_subdir_lock);
- proc_free_inum(dp->low_ino);
- return -EEXIST;
+ goto out_free_inum;
}
write_unlock(&proc_subdir_lock);
- return 0;
+ return dp;
+out_free_inum:
+ proc_free_inum(dp->low_ino);
+out_free_entry:
+ pde_free(dp);
+ return NULL;
}
static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
@@ -443,10 +446,7 @@ struct proc_dir_entry *proc_symlink(const char *name,
if (ent->data) {
strcpy((char*)ent->data,dest);
ent->proc_iops = &proc_link_inode_operations;
- if (proc_register(parent, ent) < 0) {
- pde_free(ent);
- ent = NULL;
- }
+ ent = proc_register(parent, ent);
} else {
pde_free(ent);
ent = NULL;
@@ -470,11 +470,9 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
ent->proc_fops = &proc_dir_operations;
ent->proc_iops = &proc_dir_inode_operations;
parent->nlink++;
- if (proc_register(parent, ent) < 0) {
- pde_free(ent);
+ ent = proc_register(parent, ent);
+ if (!ent)
parent->nlink--;
- ent = NULL;
- }
}
return ent;
}
@@ -505,47 +503,47 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
ent->proc_fops = NULL;
ent->proc_iops = NULL;
parent->nlink++;
- if (proc_register(parent, ent) < 0) {
- pde_free(ent);
+ ent = proc_register(parent, ent);
+ if (!ent)
parent->nlink--;
- ent = NULL;
- }
}
return ent;
}
EXPORT_SYMBOL(proc_create_mount_point);
-struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
- struct proc_dir_entry *parent,
- const struct file_operations *proc_fops,
- void *data)
+struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
+ struct proc_dir_entry **parent, void *data)
{
- struct proc_dir_entry *pde;
+ struct proc_dir_entry *p;
+
if ((mode & S_IFMT) == 0)
mode |= S_IFREG;
-
- if (!S_ISREG(mode)) {
- WARN_ON(1); /* use proc_mkdir() */
+ if ((mode & S_IALLUGO) == 0)
+ mode |= S_IRUGO;
+ if (WARN_ON_ONCE(!S_ISREG(mode)))
return NULL;
+
+ p = __proc_create(parent, name, mode, 1);
+ if (p) {
+ p->proc_iops = &proc_file_inode_operations;
+ p->data = data;
}
+ return p;
+}
+
+struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ const struct file_operations *proc_fops, void *data)
+{
+ struct proc_dir_entry *p;
BUG_ON(proc_fops == NULL);
- if ((mode & S_IALLUGO) == 0)
- mode |= S_IRUGO;
- pde = __proc_create(&parent, name, mode, 1);
- if (!pde)
- goto out;
- pde->proc_fops = proc_fops;
- pde->data = data;
- pde->proc_iops = &proc_file_inode_operations;
- if (proc_register(parent, pde) < 0)
- goto out_free;
- return pde;
-out_free:
- pde_free(pde);
-out:
- return NULL;
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = proc_fops;
+ return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_data);
@@ -557,6 +555,67 @@ struct proc_dir_entry *proc_create(const char *name, umode_t mode,
}
EXPORT_SYMBOL(proc_create);
+static int proc_seq_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *de = PDE(inode);
+
+ if (de->state_size)
+ return seq_open_private(file, de->seq_ops, de->state_size);
+ return seq_open(file, de->seq_ops);
+}
+
+static const struct file_operations proc_seq_fops = {
+ .open = proc_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
+ struct proc_dir_entry *parent, const struct seq_operations *ops,
+ unsigned int state_size, void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_seq_fops;
+ p->seq_ops = ops;
+ p->state_size = state_size;
+ return proc_register(parent, p);
+}
+EXPORT_SYMBOL(proc_create_seq_private);
+
+static int proc_single_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *de = PDE(inode);
+
+ return single_open(file, de->single_show, de->data);
+}
+
+static const struct file_operations proc_single_fops = {
+ .open = proc_single_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ int (*show)(struct seq_file *, void *), void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_single_fops;
+ p->single_show = show;
+ return proc_register(parent, p);
+}
+EXPORT_SYMBOL(proc_create_single_data);
+
void proc_set_size(struct proc_dir_entry *de, loff_t size)
{
de->size = size;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 0f1692e63cb6..43c70c9e6b62 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -44,7 +44,12 @@ struct proc_dir_entry {
struct completion *pde_unload_completion;
const struct inode_operations *proc_iops;
const struct file_operations *proc_fops;
+ union {
+ const struct seq_operations *seq_ops;
+ int (*single_show)(struct seq_file *, void *);
+ };
void *data;
+ unsigned int state_size;
unsigned int low_ino;
nlink_t nlink;
kuid_t uid;
@@ -57,9 +62,9 @@ struct proc_dir_entry {
umode_t mode;
u8 namelen;
#ifdef CONFIG_64BIT
-#define SIZEOF_PDE_INLINE_NAME (192-139)
+#define SIZEOF_PDE_INLINE_NAME (192-155)
#else
-#define SIZEOF_PDE_INLINE_NAME (128-87)
+#define SIZEOF_PDE_INLINE_NAME (128-95)
#endif
char inline_name[SIZEOF_PDE_INLINE_NAME];
} __randomize_layout;
@@ -147,14 +152,14 @@ extern const struct dentry_operations pid_dentry_operations;
extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int);
extern int proc_setattr(struct dentry *, struct iattr *);
extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
-extern int pid_revalidate(struct dentry *, unsigned int);
+extern void pid_update_inode(struct task_struct *, struct inode *);
extern int pid_delete_dentry(const struct dentry *);
extern int proc_pid_readdir(struct file *, struct dir_context *);
extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int);
extern loff_t mem_lseek(struct file *, loff_t, int);
/* Lookups */
-typedef int instantiate_t(struct inode *, struct dentry *,
+typedef struct dentry *instantiate_t(struct dentry *,
struct task_struct *, const void *);
extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int,
instantiate_t, struct task_struct *, const void *);
@@ -162,6 +167,10 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i
/*
* generic.c
*/
+struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
+ struct proc_dir_entry **parent, void *data);
+struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
+ struct proc_dir_entry *dp);
extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *);
extern int proc_readdir(struct file *, struct dir_context *);
diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c
index 6a6bee9c603c..cb0edc7cbf09 100644
--- a/fs/proc/interrupts.c
+++ b/fs/proc/interrupts.c
@@ -34,21 +34,9 @@ static const struct seq_operations int_seq_ops = {
.show = show_interrupts
};
-static int interrupts_open(struct inode *inode, struct file *filp)
-{
- return seq_open(filp, &int_seq_ops);
-}
-
-static const struct file_operations proc_interrupts_operations = {
- .open = interrupts_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_interrupts_init(void)
{
- proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
+ proc_create_seq("interrupts", 0, NULL, &int_seq_ops);
return 0;
}
fs_initcall(proc_interrupts_init);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d1e82761de81..e64ecb9f2720 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
{
struct list_head *head = (struct list_head *)arg;
struct kcore_list *ent;
+ struct page *p;
+
+ if (!pfn_valid(pfn))
+ return 1;
+
+ p = pfn_to_page(pfn);
+ if (!memmap_valid_within(pfn, p, page_zone(p)))
+ return 1;
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
return -ENOMEM;
- ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
+ ent->addr = (unsigned long)page_to_virt(p);
ent->size = nr_pages << PAGE_SHIFT;
- /* Sanity check: Can happen in 32bit arch...maybe */
- if (ent->addr < (unsigned long) __va(0))
+ if (!virt_addr_valid(ent->addr))
goto free_out;
/* cut not-mapped area. ....from ppc-32 code. */
if (ULONG_MAX - ent->addr < ent->size)
ent->size = ULONG_MAX - ent->addr;
- /* cut when vmalloc() area is higher than direct-map area */
- if (VMALLOC_START > (unsigned long)__va(0)) {
- if (ent->addr > VMALLOC_START)
- goto free_out;
+ /*
+ * We've already checked virt_addr_valid so we know this address
+ * is a valid pointer, therefore we can check against it to determine
+ * if we need to trim
+ */
+ if (VMALLOC_START > ent->addr) {
if (VMALLOC_START - ent->addr < ent->size)
ent->size = VMALLOC_START - ent->addr;
}
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index b572cc865b92..d06694757201 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -28,21 +28,9 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int loadavg_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, loadavg_proc_show, NULL);
-}
-
-static const struct file_operations loadavg_proc_fops = {
- .open = loadavg_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_loadavg_init(void)
{
- proc_create("loadavg", 0, NULL, &loadavg_proc_fops);
+ proc_create_single("loadavg", 0, NULL, loadavg_proc_show);
return 0;
}
fs_initcall(proc_loadavg_init);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 65a72ab57471..2fb04846ed11 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -149,21 +149,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int meminfo_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, meminfo_proc_show, NULL);
-}
-
-static const struct file_operations meminfo_proc_fops = {
- .open = meminfo_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_meminfo_init(void)
{
- proc_create("meminfo", 0, NULL, &meminfo_proc_fops);
+ proc_create_single("meminfo", 0, NULL, meminfo_proc_show);
return 0;
}
fs_initcall(proc_meminfo_init);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 59b17e509f46..dd2b35f78b09 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -87,28 +87,24 @@ static const struct inode_operations proc_ns_link_inode_operations = {
.setattr = proc_setattr,
};
-static int proc_ns_instantiate(struct inode *dir,
- struct dentry *dentry, struct task_struct *task, const void *ptr)
+static struct dentry *proc_ns_instantiate(struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
{
const struct proc_ns_operations *ns_ops = ptr;
struct inode *inode;
struct proc_inode *ei;
- inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | S_IRWXUGO);
+ inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOENT);
ei = PROC_I(inode);
inode->i_op = &proc_ns_link_inode_operations;
ei->ns_ops = ns_ops;
+ pid_update_inode(task, inode);
d_set_d_op(dentry, &pid_dentry_operations);
- d_add(dentry, inode);
- /* Close the race of the process dying before we return the dentry */
- if (pid_revalidate(dentry, 0))
- return 0;
-out:
- return -ENOENT;
+ return d_splice_alias(inode, dentry);
}
static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx)
@@ -147,12 +143,10 @@ const struct file_operations proc_ns_dir_operations = {
static struct dentry *proc_ns_dir_lookup(struct inode *dir,
struct dentry *dentry, unsigned int flags)
{
- int error;
struct task_struct *task = get_proc_task(dir);
const struct proc_ns_operations **entry, **last;
unsigned int len = dentry->d_name.len;
-
- error = -ENOENT;
+ struct dentry *res = ERR_PTR(-ENOENT);
if (!task)
goto out_no_task;
@@ -167,11 +161,11 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
if (entry == last)
goto out;
- error = proc_ns_instantiate(dir, dentry, task, *entry);
+ res = proc_ns_instantiate(dentry, task, *entry);
out:
put_task_struct(task);
out_no_task:
- return ERR_PTR(error);
+ return res;
}
const struct inode_operations proc_ns_dir_inode_operations = {
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 75634379f82e..3b63be64e436 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -113,21 +113,9 @@ static const struct seq_operations proc_nommu_region_list_seqop = {
.show = nommu_region_list_show
};
-static int proc_nommu_region_list_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &proc_nommu_region_list_seqop);
-}
-
-static const struct file_operations proc_nommu_region_list_operations = {
- .open = proc_nommu_region_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_nommu_init(void)
{
- proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations);
+ proc_create_seq("maps", S_IRUGO, NULL, &proc_nommu_region_list_seqop);
return 0;
}
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 1763f370489d..7d94fa005b0d 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -38,20 +38,20 @@ static struct net *get_proc_net(const struct inode *inode)
return maybe_get_net(PDE_NET(PDE(inode)));
}
-int seq_open_net(struct inode *ino, struct file *f,
- const struct seq_operations *ops, int size)
+static int seq_open_net(struct inode *inode, struct file *file)
{
- struct net *net;
+ unsigned int state_size = PDE(inode)->state_size;
struct seq_net_private *p;
+ struct net *net;
- BUG_ON(size < sizeof(*p));
+ WARN_ON_ONCE(state_size < sizeof(*p));
- net = get_proc_net(ino);
- if (net == NULL)
+ net = get_proc_net(inode);
+ if (!net)
return -ENXIO;
- p = __seq_open_private(f, ops, size);
- if (p == NULL) {
+ p = __seq_open_private(file, PDE(inode)->seq_ops, state_size);
+ if (!p) {
put_net(net);
return -ENOMEM;
}
@@ -60,51 +60,83 @@ int seq_open_net(struct inode *ino, struct file *f,
#endif
return 0;
}
-EXPORT_SYMBOL_GPL(seq_open_net);
-int single_open_net(struct inode *inode, struct file *file,
- int (*show)(struct seq_file *, void *))
+static int seq_release_net(struct inode *ino, struct file *f)
{
- int err;
- struct net *net;
-
- err = -ENXIO;
- net = get_proc_net(inode);
- if (net == NULL)
- goto err_net;
-
- err = single_open(file, show, net);
- if (err < 0)
- goto err_open;
+ struct seq_file *seq = f->private_data;
+ put_net(seq_file_net(seq));
+ seq_release_private(ino, f);
return 0;
+}
-err_open:
- put_net(net);
-err_net:
- return err;
+static const struct file_operations proc_net_seq_fops = {
+ .open = seq_open_net,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent, const struct seq_operations *ops,
+ unsigned int state_size, void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_net_seq_fops;
+ p->seq_ops = ops;
+ p->state_size = state_size;
+ return proc_register(parent, p);
}
-EXPORT_SYMBOL_GPL(single_open_net);
+EXPORT_SYMBOL_GPL(proc_create_net_data);
-int seq_release_net(struct inode *ino, struct file *f)
+static int single_open_net(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
+ struct proc_dir_entry *de = PDE(inode);
+ struct net *net;
+ int err;
- seq = f->private_data;
+ net = get_proc_net(inode);
+ if (!net)
+ return -ENXIO;
- put_net(seq_file_net(seq));
- seq_release_private(ino, f);
- return 0;
+ err = single_open(file, de->single_show, net);
+ if (err)
+ put_net(net);
+ return err;
}
-EXPORT_SYMBOL_GPL(seq_release_net);
-int single_release_net(struct inode *ino, struct file *f)
+static int single_release_net(struct inode *ino, struct file *f)
{
struct seq_file *seq = f->private_data;
put_net(seq->private);
return single_release(ino, f);
}
-EXPORT_SYMBOL_GPL(single_release_net);
+
+static const struct file_operations proc_net_single_fops = {
+ .open = single_open_net,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release_net,
+};
+
+struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ int (*show)(struct seq_file *, void *), void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_net_single_fops;
+ p->single_show = show;
+ return proc_register(parent, p);
+}
+EXPORT_SYMBOL_GPL(proc_create_net_single);
static struct net *get_proc_task_net(struct inode *dir)
{
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 8989936f2995..4d765e5e91ed 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -554,9 +554,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
if (!inode)
goto out;
- err = NULL;
d_set_d_op(dentry, &proc_sys_dentry_operations);
- d_add(dentry, inode);
+ err = d_splice_alias(inode, dentry);
out:
if (h)
@@ -684,6 +683,7 @@ static bool proc_sys_fill_cache(struct file *file,
if (IS_ERR(child))
return false;
if (d_in_lookup(child)) {
+ struct dentry *res;
inode = proc_sys_make_inode(dir->d_sb, head, table);
if (!inode) {
d_lookup_done(child);
@@ -691,7 +691,16 @@ static bool proc_sys_fill_cache(struct file *file,
return false;
}
d_set_d_op(child, &proc_sys_dentry_operations);
- d_add(child, inode);
+ res = d_splice_alias(inode, child);
+ d_lookup_done(child);
+ if (unlikely(res)) {
+ if (IS_ERR(res)) {
+ dput(child);
+ return false;
+ }
+ dput(child);
+ child = res;
+ }
}
}
inode = d_inode(child);
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index d0cf1c50bb6c..c69ff191e5d8 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -126,18 +126,6 @@ static const struct seq_operations tty_drivers_op = {
.show = show_tty_driver
};
-static int tty_drivers_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &tty_drivers_op);
-}
-
-static const struct file_operations proc_tty_drivers_operations = {
- .open = tty_drivers_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
/*
* This function is called by tty_register_driver() to handle
* registering the driver's /proc handler into /proc/tty/driver/<foo>
@@ -147,11 +135,11 @@ void proc_tty_register_driver(struct tty_driver *driver)
struct proc_dir_entry *ent;
if (!driver->driver_name || driver->proc_entry ||
- !driver->ops->proc_fops)
+ !driver->ops->proc_show)
return;
- ent = proc_create_data(driver->driver_name, 0, proc_tty_driver,
- driver->ops->proc_fops, driver);
+ ent = proc_create_single_data(driver->driver_name, 0, proc_tty_driver,
+ driver->ops->proc_show, driver);
driver->proc_entry = ent;
}
@@ -186,6 +174,6 @@ void __init proc_tty_init(void)
* entry.
*/
proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL);
- proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops);
- proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations);
+ proc_create_seq("tty/ldiscs", 0, NULL, &tty_ldiscs_seq_ops);
+ proc_create_seq("tty/drivers", 0, NULL, &tty_drivers_op);
}
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 4d7d061696b3..127265e5c55f 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -12,7 +12,7 @@ static const char *proc_self_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct pid_namespace *ns = inode->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(inode);
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
@@ -36,7 +36,7 @@ static unsigned self_inum __ro_after_init;
int proc_setup_self(struct super_block *s)
{
struct inode *root_inode = d_inode(s->s_root);
- struct pid_namespace *ns = s->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(root_inode);
struct dentry *self;
inode_lock(root_inode);
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 24072cc06e65..12901dcf57e2 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -25,21 +25,9 @@ static int show_softirqs(struct seq_file *p, void *v)
return 0;
}
-static int softirqs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, show_softirqs, NULL);
-}
-
-static const struct file_operations proc_softirqs_operations = {
- .open = softirqs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_softirqs_init(void)
{
- proc_create("softirqs", 0, NULL, &proc_softirqs_operations);
+ proc_create_single("softirqs", 0, NULL, show_softirqs);
return 0;
}
fs_initcall(proc_softirqs_init);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index c486ad4b43f0..a20c6e495bb2 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -937,7 +937,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
/*
* The soft-dirty tracker uses #PF-s to catch writes
* to pages, so write-protect the pte as well. See the
- * Documentation/vm/soft-dirty.txt for full description
+ * Documentation/admin-guide/mm/soft-dirty.rst for full description
* of how soft-dirty works.
*/
pte_t ptent = *pte;
@@ -1421,7 +1421,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
* Bits 0-54 page frame number (PFN) if present
* Bits 0-4 swap type if swapped
* Bits 5-54 swap offset if swapped
- * Bit 55 pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
+ * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
* Bit 56 page exclusively mapped
* Bits 57-60 zero
* Bit 61 page is file-page or shared-anon
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 9d2efaca499f..b905010ca9eb 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -12,7 +12,7 @@ static const char *proc_thread_self_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct pid_namespace *ns = inode->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(inode);
pid_t tgid = task_tgid_nr_ns(current, ns);
pid_t pid = task_pid_nr_ns(current, ns);
char *name;
@@ -36,7 +36,7 @@ static unsigned thread_self_inum __ro_after_init;
int proc_setup_thread_self(struct super_block *s)
{
struct inode *root_inode = d_inode(s->s_root);
- struct pid_namespace *ns = s->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(root_inode);
struct dentry *thread_self;
inode_lock(root_inode);
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 95a708d83721..3bd12f955867 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -30,21 +30,9 @@ static int uptime_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int uptime_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, uptime_proc_show, NULL);
-}
-
-static const struct file_operations uptime_proc_fops = {
- .open = uptime_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_uptime_init(void)
{
- proc_create("uptime", 0, NULL, &uptime_proc_fops);
+ proc_create_single("uptime", 0, NULL, uptime_proc_show);
return 0;
}
fs_initcall(proc_uptime_init);
diff --git a/fs/proc/version.c b/fs/proc/version.c
index 94901e8e700d..b449f186577f 100644
--- a/fs/proc/version.c
+++ b/fs/proc/version.c
@@ -15,21 +15,9 @@ static int version_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int version_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, version_proc_show, NULL);
-}
-
-static const struct file_operations version_proc_fops = {
- .open = version_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_version_init(void)
{
- proc_create("version", 0, NULL, &version_proc_fops);
+ proc_create_single("version", 0, NULL, version_proc_show);
return 0;
}
fs_initcall(proc_version_init);
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index eca27878079d..8d72221735d7 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -114,13 +114,9 @@ struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned i
brelse(bh);
foundinode = qnx4_iget(dir->i_sb, ino);
- if (IS_ERR(foundinode)) {
+ if (IS_ERR(foundinode))
QNX4DEBUG((KERN_ERR "qnx4: lookup->iget -> error %ld\n",
PTR_ERR(foundinode)));
- return ERR_CAST(foundinode);
- }
out:
- d_add(dentry, foundinode);
-
- return NULL;
+ return d_splice_alias(foundinode, dentry);
}
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index 72c2770830be..e2e98e653b8d 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -29,15 +29,11 @@ struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
if (ino) {
foundinode = qnx6_iget(dir->i_sb, ino);
qnx6_put_page(page);
- if (IS_ERR(foundinode)) {
+ if (IS_ERR(foundinode))
pr_debug("lookup->iget -> error %ld\n",
PTR_ERR(foundinode));
- return ERR_CAST(foundinode);
- }
} else {
pr_debug("%s(): not found %s\n", __func__, name);
- return NULL;
}
- d_add(dentry, foundinode);
- return NULL;
+ return d_splice_alias(foundinode, dentry);
}
diff --git a/fs/read_write.c b/fs/read_write.c
index c4eabbfc90df..e83bd9744b5d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -2023,7 +2023,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
ret = mnt_want_write_file(dst_file);
if (ret) {
info->status = ret;
- goto next_loop;
+ goto next_fdput;
}
dst_off = info->dest_offset;
@@ -2058,9 +2058,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
next_file:
mnt_drop_write_file(dst_file);
-next_loop:
+next_fdput:
fdput(dst_fd);
-
+next_loop:
if (fatal_signal_pending(current))
goto out;
}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index bd39a998843d..5089dac02660 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -687,8 +687,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
reiserfs_update_inode_transaction(inode);
reiserfs_update_inode_transaction(dir);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
retval = journal_end(&th);
out_failed:
@@ -771,8 +770,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
goto out_failed;
}
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
retval = journal_end(&th);
out_failed:
@@ -871,8 +869,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
/* the above add_entry did not update dir's stat data */
reiserfs_update_sd(&th, dir);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
retval = journal_end(&th);
out_failed:
reiserfs_write_unlock(dir->i_sb);
@@ -1187,8 +1184,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
goto out_failed;
}
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
retval = journal_end(&th);
out_failed:
reiserfs_write_unlock(parent_dir->i_sb);
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index fe999157dd97..e39b3910d24d 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -389,27 +389,13 @@ static int show_journal(struct seq_file *m, void *unused)
return 0;
}
-static int r_open(struct inode *inode, struct file *file)
-{
- return single_open(file, PDE_DATA(inode),
- proc_get_parent_data(inode));
-}
-
-static const struct file_operations r_file_operations = {
- .open = r_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static struct proc_dir_entry *proc_info_root = NULL;
static const char proc_info_root_name[] = "fs/reiserfs";
static void add_file(struct super_block *sb, char *name,
int (*func) (struct seq_file *, void *))
{
- proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
- &r_file_operations, func);
+ proc_create_single_data(name, 0, REISERFS_SB(sb)->procdir, func, sb);
}
int reiserfs_proc_info_init(struct super_block *sb)
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 8f06fd1f3d69..6ccb51993a76 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -213,7 +213,7 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
unsigned long offset, maxoff;
- struct inode *inode;
+ struct inode *inode = NULL;
struct romfs_inode ri;
const char *name; /* got from dentry */
int len, ret;
@@ -233,7 +233,7 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
for (;;) {
if (!offset || offset >= maxoff)
- goto out0;
+ break;
ret = romfs_dev_read(dir->i_sb, offset, &ri, sizeof(ri));
if (ret < 0)
@@ -244,37 +244,19 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
len);
if (ret < 0)
goto error;
- if (ret == 1)
+ if (ret == 1) {
+ /* Hard link handling */
+ if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD)
+ offset = be32_to_cpu(ri.spec) & ROMFH_MASK;
+ inode = romfs_iget(dir->i_sb, offset);
break;
+ }
/* next entry */
offset = be32_to_cpu(ri.next) & ROMFH_MASK;
}
- /* Hard link handling */
- if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD)
- offset = be32_to_cpu(ri.spec) & ROMFH_MASK;
-
- inode = romfs_iget(dir->i_sb, offset);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
- goto error;
- }
- goto outi;
-
- /*
- * it's a bit funky, _lookup needs to return an error code
- * (negative) or a NULL, both as a dentry. ENOENT should not
- * be returned, instead we need to create a negative dentry by
- * d_add(dentry, NULL); and return 0 as no error.
- * (Although as I see, it only matters on writable file
- * systems).
- */
-out0:
- inode = NULL;
-outi:
- d_add(dentry, inode);
- ret = 0;
+ return d_splice_alias(inode, dentry);
error:
return ERR_PTR(ret);
}
diff --git a/fs/select.c b/fs/select.c
index ba879c51288f..bc3cc0f98896 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -34,6 +34,29 @@
#include <linux/uaccess.h>
+__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt)
+{
+ if (file->f_op->poll) {
+ return file->f_op->poll(file, pt);
+ } else if (file_has_poll_mask(file)) {
+ unsigned int events = poll_requested_events(pt);
+ struct wait_queue_head *head;
+
+ if (pt && pt->_qproc) {
+ head = file->f_op->get_poll_head(file, events);
+ if (!head)
+ return DEFAULT_POLLMASK;
+ if (IS_ERR(head))
+ return EPOLLERR;
+ pt->_qproc(file, head, pt);
+ }
+
+ return file->f_op->poll_mask(file, events);
+ } else {
+ return DEFAULT_POLLMASK;
+ }
+}
+EXPORT_SYMBOL_GPL(vfs_poll);
/*
* Estimate expected accuracy in ns from a timeval.
@@ -233,7 +256,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
add_wait_queue(wait_address, &entry->wait);
}
-int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
+static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
ktime_t *expires, unsigned long slack)
{
int rc = -EINTR;
@@ -258,7 +281,6 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
return rc;
}
-EXPORT_SYMBOL(poll_schedule_timeout);
/**
* poll_select_set_timeout - helper function to setup the timeout value
@@ -503,14 +525,10 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
continue;
f = fdget(i);
if (f.file) {
- const struct file_operations *f_op;
- f_op = f.file->f_op;
- mask = DEFAULT_POLLMASK;
- if (f_op->poll) {
- wait_key_set(wait, in, out,
- bit, busy_flag);
- mask = (*f_op->poll)(f.file, wait);
- }
+ wait_key_set(wait, in, out, bit,
+ busy_flag);
+ mask = vfs_poll(f.file, wait);
+
fdput(f);
if ((mask & POLLIN_SET) && (in & bit)) {
res_in |= bit;
@@ -813,34 +831,29 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
bool *can_busy_poll,
__poll_t busy_flag)
{
- __poll_t mask;
- int fd;
-
- mask = 0;
- fd = pollfd->fd;
- if (fd >= 0) {
- struct fd f = fdget(fd);
- mask = EPOLLNVAL;
- if (f.file) {
- /* userland u16 ->events contains POLL... bitmap */
- __poll_t filter = demangle_poll(pollfd->events) |
- EPOLLERR | EPOLLHUP;
- mask = DEFAULT_POLLMASK;
- if (f.file->f_op->poll) {
- pwait->_key = filter;
- pwait->_key |= busy_flag;
- mask = f.file->f_op->poll(f.file, pwait);
- if (mask & busy_flag)
- *can_busy_poll = true;
- }
- /* Mask out unneeded events. */
- mask &= filter;
- fdput(f);
- }
- }
+ int fd = pollfd->fd;
+ __poll_t mask = 0, filter;
+ struct fd f;
+
+ if (fd < 0)
+ goto out;
+ mask = EPOLLNVAL;
+ f = fdget(fd);
+ if (!f.file)
+ goto out;
+
+ /* userland u16 ->events contains POLL... bitmap */
+ filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
+ pwait->_key = filter | busy_flag;
+ mask = vfs_poll(f.file, pwait);
+ if (mask & busy_flag)
+ *can_busy_poll = true;
+ mask &= filter; /* Mask out unneeded events. */
+ fdput(f);
+
+out:
/* ... and so does ->revents */
pollfd->revents = mangle_poll(mask);
-
return mask;
}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index c6c27f1f9c98..4cc090b50cc5 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -709,11 +709,6 @@ void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
if (m->count + width >= m->size)
goto overflow;
- if (num < 10) {
- m->buf[m->count++] = num + '0';
- return;
- }
-
len = num_to_str(m->buf + m->count, m->size - m->count, num, width);
if (!len)
goto overflow;
diff --git a/fs/super.c b/fs/super.c
index 122c402049a2..50728d9c1a05 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -121,13 +121,23 @@ static unsigned long super_cache_count(struct shrinker *shrink,
sb = container_of(shrink, struct super_block, s_shrink);
/*
- * Don't call trylock_super as it is a potential
- * scalability bottleneck. The counts could get updated
- * between super_cache_count and super_cache_scan anyway.
- * Call to super_cache_count with shrinker_rwsem held
- * ensures the safety of call to list_lru_shrink_count() and
- * s_op->nr_cached_objects().
+ * We don't call trylock_super() here as it is a scalability bottleneck,
+ * so we're exposed to partial setup state. The shrinker rwsem does not
+ * protect filesystem operations backing list_lru_shrink_count() or
+ * s_op->nr_cached_objects(). Counts can change between
+ * super_cache_count and super_cache_scan, so we really don't need locks
+ * here.
+ *
+ * However, if we are currently mounting the superblock, the underlying
+ * filesystem might be in a state of partial construction and hence it
+ * is dangerous to access it. trylock_super() uses a SB_BORN check to
+ * avoid this situation, so do the same here. The memory barrier is
+ * matched with the one in mount_fs() as we don't hold locks here.
*/
+ if (!(sb->s_flags & SB_BORN))
+ return 0;
+ smp_rmb();
+
if (sb->s_op && sb->s_op->nr_cached_objects)
total_objects = sb->s_op->nr_cached_objects(sb, sc);
@@ -937,7 +947,7 @@ void emergency_remount(void)
static void do_thaw_all_callback(struct super_block *sb)
{
down_write(&sb->s_umount);
- if (sb->s_root && sb->s_flags & MS_BORN) {
+ if (sb->s_root && sb->s_flags & SB_BORN) {
emergency_thaw_bdev(sb);
thaw_super_locked(sb);
} else {
@@ -1272,6 +1282,14 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
sb = root->d_sb;
BUG_ON(!sb);
WARN_ON(!sb->s_bdi);
+
+ /*
+ * Write barrier is for super_cache_count(). We place it before setting
+ * SB_BORN as the data dependency between the two functions is the
+ * superblock structure contents that we just set up, not the SB_BORN
+ * flag.
+ */
+ smp_wmb();
sb->s_flags |= SB_BORN;
error = security_sb_kern_mount(sb, flags, secdata);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index b428d317ae92..92682fcc41f6 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -25,7 +25,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
{
struct dentry *root;
void *ns;
- bool new_sb;
+ bool new_sb = false;
if (!(flags & SB_KERNMOUNT)) {
if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
@@ -35,9 +35,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
root = kernfs_mount_ns(fs_type, flags, sysfs_root,
SYSFS_MAGIC, &new_sb, ns);
- if (IS_ERR(root) || !new_sb)
+ if (!new_sb)
kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
- else if (new_sb)
+ else if (!IS_ERR(root))
root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
return root;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 250b0755b908..4d5d20491ffd 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -51,14 +51,9 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, un
if (dentry->d_name.len > SYSV_NAMELEN)
return ERR_PTR(-ENAMETOOLONG);
ino = sysv_inode_by_name(dentry);
-
- if (ino) {
+ if (ino)
inode = sysv_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- }
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode, dev_t rdev)
diff --git a/fs/timerfd.c b/fs/timerfd.c
index cdad49da3ff7..d84a2bee4f82 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -226,21 +226,20 @@ static int timerfd_release(struct inode *inode, struct file *file)
kfree_rcu(ctx, rcu);
return 0;
}
-
-static __poll_t timerfd_poll(struct file *file, poll_table *wait)
+
+static struct wait_queue_head *timerfd_get_poll_head(struct file *file,
+ __poll_t eventmask)
{
struct timerfd_ctx *ctx = file->private_data;
- __poll_t events = 0;
- unsigned long flags;
- poll_wait(file, &ctx->wqh, wait);
+ return &ctx->wqh;
+}
- spin_lock_irqsave(&ctx->wqh.lock, flags);
- if (ctx->ticks)
- events |= EPOLLIN;
- spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+static __poll_t timerfd_poll_mask(struct file *file, __poll_t eventmask)
+{
+ struct timerfd_ctx *ctx = file->private_data;
- return events;
+ return ctx->ticks ? EPOLLIN : 0;
}
static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
@@ -364,7 +363,8 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
static const struct file_operations timerfd_fops = {
.release = timerfd_release,
- .poll = timerfd_poll,
+ .get_poll_head = timerfd_get_poll_head,
+ .poll_mask = timerfd_poll_mask,
.read = timerfd_read,
.llseek = noop_llseek,
.show_fdinfo = timerfd_show,
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 9d7fb88e172e..4e267cc21c77 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -214,7 +214,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
int err;
union ubifs_key key;
struct inode *inode = NULL;
- struct ubifs_dent_node *dent;
+ struct ubifs_dent_node *dent = NULL;
struct ubifs_info *c = dir->i_sb->s_fs_info;
struct fscrypt_name nm;
@@ -229,14 +229,14 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(err);
if (fname_len(&nm) > UBIFS_MAX_NLEN) {
- err = -ENAMETOOLONG;
- goto out_fname;
+ inode = ERR_PTR(-ENAMETOOLONG);
+ goto done;
}
dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
if (!dent) {
- err = -ENOMEM;
- goto out_fname;
+ inode = ERR_PTR(-ENOMEM);
+ goto done;
}
if (nm.hash) {
@@ -250,16 +250,16 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
}
if (err) {
- if (err == -ENOENT) {
+ if (err == -ENOENT)
dbg_gen("not found");
- goto done;
- }
- goto out_dent;
+ else
+ inode = ERR_PTR(err);
+ goto done;
}
if (dbg_check_name(c, dent, &nm)) {
- err = -EINVAL;
- goto out_dent;
+ inode = ERR_PTR(-EINVAL);
+ goto done;
}
inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum));
@@ -272,7 +272,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
ubifs_err(c, "dead directory entry '%pd', error %d",
dentry, err);
ubifs_ro_mode(c, err);
- goto out_dent;
+ goto done;
}
if (ubifs_crypt_is_encrypted(dir) &&
@@ -280,27 +280,14 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
!fscrypt_has_permitted_context(dir, inode)) {
ubifs_warn(c, "Inconsistent encryption contexts: %lu/%lu",
dir->i_ino, inode->i_ino);
- err = -EPERM;
- goto out_inode;
+ iput(inode);
+ inode = ERR_PTR(-EPERM);
}
done:
kfree(dent);
fscrypt_free_filename(&nm);
- /*
- * Note, d_splice_alias() would be required instead if we supported
- * NFS.
- */
- d_add(dentry, inode);
- return NULL;
-
-out_inode:
- iput(inode);
-out_dent:
- kfree(dent);
-out_fname:
- fscrypt_free_filename(&nm);
- return ERR_PTR(err);
+ return d_splice_alias(inode, dentry);
}
static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 0458dd47e105..c586026508db 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -622,8 +622,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
@@ -733,8 +732,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inc_nlink(dir);
dir->i_ctime = dir->i_mtime = current_time(dir);
mark_inode_dirty(dir);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 32545cd00ceb..d5f43ba76c59 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -39,8 +39,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = ufs_add_link(dentry, inode);
if (!err) {
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -193,8 +192,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
if (err)
goto out_fail;
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
out_fail:
diff --git a/fs/xattr.c b/fs/xattr.c
index 61cd28ba25f3..f9cb1db187b7 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -229,7 +229,7 @@ out:
}
EXPORT_SYMBOL_GPL(vfs_setxattr);
-ssize_t
+static ssize_t
xattr_getsecurity(struct inode *inode, const char *name, void *value,
size_t size)
{
@@ -254,7 +254,6 @@ out:
out_noalloc:
return len;
}
-EXPORT_SYMBOL_GPL(xattr_getsecurity);
/*
* vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr
@@ -354,7 +353,6 @@ vfs_listxattr(struct dentry *dentry, char *list, size_t size)
if (error)
return error;
if (inode->i_op->listxattr && (inode->i_opflags & IOP_XATTR)) {
- error = -EOPNOTSUPP;
error = inode->i_op->listxattr(dentry, list, size);
} else {
error = security_inode_listsecurity(inode, list, size);
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index ce4a34a2751d..35a124400d60 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -511,7 +511,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
if (args->flags & ATTR_CREATE)
return retval;
retval = xfs_attr_shortform_remove(args);
- ASSERT(retval == 0);
+ if (retval)
+ return retval;
+ /*
+ * Since we have removed the old attr, clear ATTR_REPLACE so
+ * that the leaf format add routine won't trip over the attr
+ * not being around.
+ */
+ args->flags &= ~ATTR_REPLACE;
}
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 6a7c2f03ea11..040eeda8426f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -725,12 +725,16 @@ xfs_bmap_extents_to_btree(
*logflagsp = 0;
if ((error = xfs_alloc_vextent(&args))) {
xfs_iroot_realloc(ip, -1, whichfork);
+ ASSERT(ifp->if_broot == NULL);
+ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
}
if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
xfs_iroot_realloc(ip, -1, whichfork);
+ ASSERT(ifp->if_broot == NULL);
+ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return -ENOSPC;
}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index ef68b1de006a..1201107eabc6 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -466,6 +466,8 @@ xfs_dinode_verify(
return __this_address;
if (di_size > XFS_DFORK_DSIZE(dip, mp))
return __this_address;
+ if (dip->di_nextents)
+ return __this_address;
/* fall through */
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
@@ -484,12 +486,31 @@ xfs_dinode_verify(
if (XFS_DFORK_Q(dip)) {
switch (dip->di_aformat) {
case XFS_DINODE_FMT_LOCAL:
+ if (dip->di_anextents)
+ return __this_address;
+ /* fall through */
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
break;
default:
return __this_address;
}
+ } else {
+ /*
+ * If there is no fork offset, this may be a freshly-made inode
+ * in a new disk cluster, in which case di_aformat is zeroed.
+ * Otherwise, such an inode must be in EXTENTS format; this goes
+ * for freed inodes as well.
+ */
+ switch (dip->di_aformat) {
+ case 0:
+ case XFS_DINODE_FMT_EXTENTS:
+ break;
+ default:
+ return __this_address;
+ }
+ if (dip->di_anextents)
+ return __this_address;
}
/* only version 3 or greater inodes are extensively verified here */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0ab824f574ed..102463543db3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -594,7 +594,7 @@ xfs_alloc_ioend(
struct xfs_ioend *ioend;
struct bio *bio;
- bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
+ bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
xfs_init_bio_from_bh(bio, bh);
ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 69346d460dfa..694c85b03813 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -18,7 +18,7 @@
#ifndef __XFS_AOPS_H__
#define __XFS_AOPS_H__
-extern struct bio_set *xfs_ioend_bioset;
+extern struct bio_set xfs_ioend_bioset;
/*
* Types of I/O for bmap clustering and I/O completion tracking.
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 299aee4b7b0b..e70fb8ccecea 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -778,22 +778,26 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
- unsigned int blksize_mask = i_blocksize(inode) - 1;
+ unsigned int blksize_mask = i_blocksize(inode) - 1;
+ loff_t isize = i_size_read(inode);
- new_size = i_size_read(inode) + len;
if (offset & blksize_mask || len & blksize_mask) {
error = -EINVAL;
goto out_unlock;
}
- /* check the new inode size does not wrap through zero */
- if (new_size > inode->i_sb->s_maxbytes) {
+ /*
+ * New inode size must not exceed ->s_maxbytes, accounting for
+ * possible signed overflow.
+ */
+ if (inode->i_sb->s_maxbytes - isize < len) {
error = -EFBIG;
goto out_unlock;
}
+ new_size = isize + len;
/* Offset should be less than i_size */
- if (offset >= i_size_read(inode)) {
+ if (offset >= isize) {
error = -EINVAL;
goto out_unlock;
}
@@ -876,8 +880,18 @@ xfs_file_dedupe_range(
struct file *dst_file,
u64 dst_loff)
{
+ struct inode *srci = file_inode(src_file);
+ u64 max_dedupe;
int error;
+ /*
+ * Since we have to read all these pages in to compare them, cut
+ * it off at MAX_RW_COUNT/2 rounded down to the nearest block.
+ * That means we won't do more than MAX_RW_COUNT IO per request.
+ */
+ max_dedupe = (MAX_RW_COUNT >> 1) & ~(i_blocksize(srci) - 1);
+ if (len > max_dedupe)
+ len = max_dedupe;
error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
len, true);
if (error)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index a3ed3c811dfa..df42e4cb4dc4 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -260,6 +260,7 @@ xfs_vn_lookup(
struct dentry *dentry,
unsigned int flags)
{
+ struct inode *inode;
struct xfs_inode *cip;
struct xfs_name name;
int error;
@@ -269,14 +270,13 @@ xfs_vn_lookup(
xfs_dentry_to_name(&name, dentry);
error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
- if (unlikely(error)) {
- if (unlikely(error != -ENOENT))
- return ERR_PTR(error);
- d_add(dentry, NULL);
- return NULL;
- }
-
- return d_splice_alias(VFS_I(cip), dentry);
+ if (likely(!error))
+ inode = VFS_I(cip);
+ else if (likely(error == -ENOENT))
+ inode = NULL;
+ else
+ inode = ERR_PTR(error);
+ return d_splice_alias(inode, dentry);
}
STATIC struct dentry *
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 056e12b421eb..1cc79907b377 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -113,6 +113,7 @@ void xfs_stats_clearall(struct xfsstats __percpu *stats)
}
}
+#ifdef CONFIG_PROC_FS
/* legacy quota interfaces */
#ifdef CONFIG_XFS_QUOTA
static int xqm_proc_show(struct seq_file *m, void *v)
@@ -124,18 +125,6 @@ static int xqm_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int xqm_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xqm_proc_show, NULL);
-}
-
-static const struct file_operations xqm_proc_fops = {
- .open = xqm_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/* legacy quota stats interface no 2 */
static int xqmstat_proc_show(struct seq_file *m, void *v)
{
@@ -147,22 +136,8 @@ static int xqmstat_proc_show(struct seq_file *m, void *v)
seq_putc(m, '\n');
return 0;
}
-
-static int xqmstat_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xqmstat_proc_show, NULL);
-}
-
-static const struct file_operations xqmstat_proc_fops = {
- .owner = THIS_MODULE,
- .open = xqmstat_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_XFS_QUOTA */
-#ifdef CONFIG_PROC_FS
int
xfs_init_procfs(void)
{
@@ -174,11 +149,9 @@ xfs_init_procfs(void)
goto out;
#ifdef CONFIG_XFS_QUOTA
- if (!proc_create("fs/xfs/xqmstat", 0, NULL,
- &xqmstat_proc_fops))
+ if (!proc_create_single("fs/xfs/xqmstat", 0, NULL, xqmstat_proc_show))
goto out;
- if (!proc_create("fs/xfs/xqm", 0, NULL,
- &xqm_proc_fops))
+ if (!proc_create_single("fs/xfs/xqm", 0, NULL, xqm_proc_show))
goto out;
#endif
return 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d71424052917..f643d76db516 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -63,7 +63,7 @@
#include <linux/parser.h>
static const struct super_operations xfs_super_operations;
-struct bio_set *xfs_ioend_bioset;
+struct bio_set xfs_ioend_bioset;
static struct kset *xfs_kset; /* top-level xfs sysfs dir */
#ifdef DEBUG
@@ -1845,10 +1845,9 @@ MODULE_ALIAS_FS("xfs");
STATIC int __init
xfs_init_zones(void)
{
- xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
+ if (bioset_init(&xfs_ioend_bioset, 4 * MAX_BUF_PER_PAGE,
offsetof(struct xfs_ioend, io_inline_bio),
- BIOSET_NEED_BVECS);
- if (!xfs_ioend_bioset)
+ BIOSET_NEED_BVECS))
goto out;
xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
@@ -1997,7 +1996,7 @@ xfs_init_zones(void)
out_destroy_log_ticket_zone:
kmem_zone_destroy(xfs_log_ticket_zone);
out_free_ioend_bioset:
- bioset_free(xfs_ioend_bioset);
+ bioset_exit(&xfs_ioend_bioset);
out:
return -ENOMEM;
}
@@ -2029,7 +2028,7 @@ xfs_destroy_zones(void)
kmem_zone_destroy(xfs_btree_cur_zone);
kmem_zone_destroy(xfs_bmap_free_item_zone);
kmem_zone_destroy(xfs_log_ticket_zone);
- bioset_free(xfs_ioend_bioset);
+ bioset_exit(&xfs_ioend_bioset);
}
STATIC int __init