aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/Kconfig.binfmt18
-rw-r--r--fs/afs/Makefile1
-rw-r--r--fs/afs/addr_list.c4
-rw-r--r--fs/afs/callback.c24
-rw-r--r--fs/afs/cmservice.c29
-rw-r--r--fs/afs/dir.c21
-rw-r--r--fs/afs/dir_silly.c5
-rw-r--r--fs/afs/dynroot.c8
-rw-r--r--fs/afs/file.c6
-rw-r--r--fs/afs/fsclient.c2
-rw-r--r--fs/afs/inode.c48
-rw-r--r--fs/afs/internal.h41
-rw-r--r--fs/afs/misc.c48
-rw-r--r--fs/afs/netdevices.c48
-rw-r--r--fs/afs/protocol_uae.h132
-rw-r--r--fs/afs/rxrpc.c2
-rw-r--r--fs/afs/server.c39
-rw-r--r--fs/afs/server_list.c6
-rw-r--r--fs/afs/volume.c1
-rw-r--r--fs/afs/write.c3
-rw-r--r--fs/aio.c28
-rw-r--r--fs/binfmt_flat.c122
-rw-r--r--fs/block_dev.c19
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/buffer.c62
-rw-r--r--fs/ceph/file.c23
-rw-r--r--fs/ceph/mds_client.c3
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/cifsencrypt.c62
-rw-r--r--fs/cifs/cifsfs.c5
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/dns_resolve.c3
-rw-r--r--fs/cifs/smb2ops.c64
-rw-r--r--fs/cifs/smb2pdu.h14
-rw-r--r--fs/configfs/dir.c3
-rw-r--r--fs/crypto/Kconfig1
-rw-r--r--fs/crypto/bio.c73
-rw-r--r--fs/crypto/crypto.c299
-rw-r--r--fs/crypto/fname.c1
-rw-r--r--fs/crypto/fscrypt_private.h15
-rw-r--r--fs/crypto/hooks.c1
-rw-r--r--fs/crypto/keyinfo.c1
-rw-r--r--fs/crypto/policy.c2
-rw-r--r--fs/dax.c11
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/debugfs/inode.c21
-rw-r--r--fs/devpts/inode.c1
-rw-r--r--fs/direct-io.c15
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext2/balloc.c3
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/inode.c7
-rw-r--r--fs/ext2/super.c17
-rw-r--r--fs/ext2/xattr.c164
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/dir.c27
-rw-r--r--fs/ext4/ext4.h65
-rw-r--r--fs/ext4/ext4_jbd2.h12
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/extents_status.c1
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/indirect.c22
-rw-r--r--fs/ext4/inline.c21
-rw-r--r--fs/ext4/inode.c130
-rw-r--r--fs/ext4/ioctl.c48
-rw-r--r--fs/ext4/mballoc.c5
-rw-r--r--fs/ext4/move_extent.c15
-rw-r--r--fs/ext4/namei.c213
-rw-r--r--fs/ext4/page-io.c44
-rw-r--r--fs/ext4/sysfs.c6
-rw-r--r--fs/f2fs/data.c17
-rw-r--r--fs/fs-writeback.c8
-rw-r--r--fs/fuse/file.c29
-rw-r--r--fs/gfs2/aops.c110
-rw-r--r--fs/gfs2/aops.h4
-rw-r--r--fs/gfs2/bmap.c16
-rw-r--r--fs/gfs2/dir.c4
-rw-r--r--fs/gfs2/file.c37
-rw-r--r--fs/gfs2/glock.c42
-rw-r--r--fs/gfs2/glock.h11
-rw-r--r--fs/gfs2/glops.c12
-rw-r--r--fs/gfs2/incore.h6
-rw-r--r--fs/gfs2/inode.c2
-rw-r--r--fs/gfs2/log.c3
-rw-r--r--fs/gfs2/lops.c22
-rw-r--r--fs/gfs2/meta_io.c6
-rw-r--r--fs/gfs2/ops_fstype.c27
-rw-r--r--fs/gfs2/quota.c2
-rw-r--r--fs/gfs2/recovery.c3
-rw-r--r--fs/gfs2/rgrp.c48
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/super.c43
-rw-r--r--fs/gfs2/super.h2
-rw-r--r--fs/gfs2/sys.c5
-rw-r--r--fs/gfs2/trans.c6
-rw-r--r--fs/gfs2/util.c8
-rw-r--r--fs/inode.c22
-rw-r--r--fs/internal.h2
-rw-r--r--fs/io_uring.c15
-rw-r--r--fs/iomap.c27
-rw-r--r--fs/jbd2/commit.c25
-rw-r--r--fs/jbd2/journal.c25
-rw-r--r--fs/jbd2/transaction.c49
-rw-r--r--fs/lockd/clntproc.c21
-rw-r--r--fs/lockd/svc4proc.c14
-rw-r--r--fs/lockd/svclock.c118
-rw-r--r--fs/lockd/svcproc.c14
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/lockd/xdr.c3
-rw-r--r--fs/lockd/xdr4.c3
-rw-r--r--fs/locks.c67
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfs/dns_resolve.c3
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c2
-rw-r--r--fs/nfs/nfs4file.c23
-rw-r--r--fs/nfs/nfs4idmap.c2
-rw-r--r--fs/nfs/unlink.c6
-rw-r--r--fs/nfsd/blocklayout.c8
-rw-r--r--fs/nfsd/cache.h5
-rw-r--r--fs/nfsd/netns.h44
-rw-r--r--fs/nfsd/nfs4idmap.c2
-rw-r--r--fs/nfsd/nfs4state.c455
-rw-r--r--fs/nfsd/nfs4xdr.c38
-rw-r--r--fs/nfsd/nfscache.c236
-rw-r--r--fs/nfsd/nfsctl.c233
-rw-r--r--fs/nfsd/nfsd.h11
-rw-r--r--fs/nfsd/state.h11
-rw-r--r--fs/nfsd/vfs.c2
-rw-r--r--fs/nfsd/xdr4.h5
-rw-r--r--fs/notify/fanotify/fanotify_user.c22
-rw-r--r--fs/notify/fsnotify.c41
-rw-r--r--fs/proc/Kconfig4
-rw-r--r--fs/proc/array.c6
-rw-r--r--fs/proc/base.c9
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/vmcore.c6
-rw-r--r--fs/pstore/ftrace.c18
-rw-r--r--fs/pstore/inode.c13
-rw-r--r--fs/pstore/ram.c21
-rw-r--r--fs/quota/dquot.c11
-rw-r--r--fs/quota/quota.c38
-rw-r--r--fs/read_write.c124
-rw-r--r--fs/select.c18
-rw-r--r--fs/seq_file.c11
-rw-r--r--fs/sysfs/group.c54
-rw-r--r--fs/tracefs/inode.c3
-rw-r--r--fs/ubifs/crypto.c19
-rw-r--r--fs/udf/inode.c93
-rw-r--r--fs/unicode/utf8-core.c28
-rw-r--r--fs/userfaultfd.c42
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_file.c15
155 files changed, 2988 insertions, 1759 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index f1046cf6ad85..bfb1c6095c7a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -11,7 +11,6 @@ config DCACHE_WORD_ACCESS
config VALIDATE_FS_PARSER
bool "Validate filesystem parameter description"
- default y
help
Enable this to perform validation of the parameter description for a
filesystem when it is registered.
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index f87ddd1b6d72..62dc4f577ba1 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -91,12 +91,28 @@ config BINFMT_SCRIPT
Most systems will not boot if you say M or N here. If unsure, say Y.
+config ARCH_HAS_BINFMT_FLAT
+ bool
+
config BINFMT_FLAT
bool "Kernel support for flat binaries"
- depends on !MMU || ARM || M68K
+ depends on ARCH_HAS_BINFMT_FLAT
help
Support uClinux FLAT format binaries.
+config BINFMT_FLAT_ARGVP_ENVP_ON_STACK
+ bool
+
+config BINFMT_FLAT_OLD_ALWAYS_RAM
+ bool
+
+config BINFMT_FLAT_OLD
+ bool "Enable support for very old legacy flat binaries"
+ depends on BINFMT_FLAT
+ help
+ Support decade old uClinux FLAT format binaries. Unless you know
+ you have some of those say N here.
+
config BINFMT_ZFLAT
bool "Enable ZFLAT support"
depends on BINFMT_FLAT
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index cbf31f6cd177..10359bea7070 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -29,7 +29,6 @@ kafs-y := \
server.o \
server_list.o \
super.o \
- netdevices.o \
vlclient.o \
vl_list.o \
vl_probe.o \
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 86da532c192f..df415c05939e 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -246,8 +246,8 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry
_enter("%s", cell->name);
- ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1",
- &result, _expiry, true);
+ ret = dns_query(cell->net->net, "afsdb", cell->name, cell->name_len,
+ "srv=1", &result, _expiry, true);
if (ret < 0) {
_leave(" = %d [dns]", ret);
return ERR_PTR(ret);
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index d441bef72163..6cdd7047c809 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -48,7 +48,7 @@ static struct afs_cb_interest *afs_create_interest(struct afs_server *server,
refcount_set(&new->usage, 1);
new->sb = vnode->vfs_inode.i_sb;
new->vid = vnode->volume->vid;
- new->server = afs_get_server(server);
+ new->server = afs_get_server(server, afs_server_trace_get_new_cbi);
INIT_HLIST_NODE(&new->cb_vlink);
write_lock(&server->cb_break_lock);
@@ -195,7 +195,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
write_unlock(&cbi->server->cb_break_lock);
if (vi)
kfree_rcu(vi, rcu);
- afs_put_server(net, cbi->server);
+ afs_put_server(net, cbi->server, afs_server_trace_put_cbi);
}
kfree_rcu(cbi, rcu);
}
@@ -212,7 +212,7 @@ void afs_init_callback_state(struct afs_server *server)
/*
* actually break a callback
*/
-void __afs_break_callback(struct afs_vnode *vnode)
+void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason)
{
_enter("");
@@ -223,13 +223,17 @@ void __afs_break_callback(struct afs_vnode *vnode)
if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
afs_lock_may_be_available(vnode);
+
+ trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true);
+ } else {
+ trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false);
}
}
-void afs_break_callback(struct afs_vnode *vnode)
+void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason)
{
write_seqlock(&vnode->cb_lock);
- __afs_break_callback(vnode);
+ __afs_break_callback(vnode, reason);
write_sequnlock(&vnode->cb_lock);
}
@@ -275,9 +279,11 @@ static void afs_break_one_callback(struct afs_server *server,
struct afs_super_info *as = AFS_FS_S(cbi->sb);
struct afs_volume *volume = as->volume;
- write_lock(&volume->cb_break_lock);
+ write_lock(&volume->cb_v_break_lock);
volume->cb_v_break++;
- write_unlock(&volume->cb_break_lock);
+ trace_afs_cb_break(fid, volume->cb_v_break,
+ afs_cb_break_for_volume_callback, false);
+ write_unlock(&volume->cb_v_break_lock);
} else {
data.volume = NULL;
data.fid = *fid;
@@ -285,8 +291,10 @@ static void afs_break_one_callback(struct afs_server *server,
afs_iget5_test, &data);
if (inode) {
vnode = AFS_FS_I(inode);
- afs_break_callback(vnode);
+ afs_break_callback(vnode, afs_cb_break_for_callback);
iput(inode);
+ } else {
+ trace_afs_cb_miss(fid, afs_cb_break_for_callback);
}
}
}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3451be03667f..4f1b6f466ff5 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -256,8 +256,11 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
* server holds up change visibility till it receives our reply so as
* to maintain cache coherency.
*/
- if (call->server)
+ if (call->server) {
+ trace_afs_server(call->server, atomic_read(&call->server->usage),
+ afs_server_trace_callback);
afs_break_callbacks(call->server, call->count, call->request);
+ }
afs_send_empty_reply(call);
afs_put_call(call);
@@ -580,9 +583,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
*/
static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
{
- struct afs_interface *ifs;
struct afs_call *call = container_of(work, struct afs_call, work);
- int loop, nifs;
+ int loop;
struct {
struct /* InterfaceAddr */ {
@@ -600,19 +602,7 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
_enter("");
- nifs = 0;
- ifs = kcalloc(32, sizeof(*ifs), GFP_KERNEL);
- if (ifs) {
- nifs = afs_get_ipv4_interfaces(call->net, ifs, 32, false);
- if (nifs < 0) {
- kfree(ifs);
- ifs = NULL;
- nifs = 0;
- }
- }
-
memset(&reply, 0, sizeof(reply));
- reply.ia.nifs = htonl(nifs);
reply.ia.uuid[0] = call->net->uuid.time_low;
reply.ia.uuid[1] = htonl(ntohs(call->net->uuid.time_mid));
@@ -622,15 +612,6 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
for (loop = 0; loop < 6; loop++)
reply.ia.uuid[loop + 5] = htonl((s8) call->net->uuid.node[loop]);
- if (ifs) {
- for (loop = 0; loop < nifs; loop++) {
- reply.ia.ifaddr[loop] = ifs[loop].address.s_addr;
- reply.ia.netmask[loop] = ifs[loop].netmask.s_addr;
- reply.ia.mtu[loop] = htonl(ifs[loop].mtu);
- }
- kfree(ifs);
- }
-
reply.cap.capcount = htonl(1);
reply.cap.caps[0] = htonl(AFS_CAP_ERROR_TRANSLATION);
afs_send_simple_reply(call, &reply, sizeof(reply));
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index da9563d62b32..e640d67274be 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -238,8 +238,7 @@ retry:
if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *))
nr_inline = 0;
- req = kzalloc(sizeof(*req) + sizeof(struct page *) * nr_inline,
- GFP_KERNEL);
+ req = kzalloc(struct_size(req, array, nr_inline), GFP_KERNEL);
if (!req)
return ERR_PTR(-ENOMEM);
@@ -1363,12 +1362,12 @@ static int afs_dir_remove_link(struct afs_vnode *dvnode, struct dentry *dentry,
drop_nlink(&vnode->vfs_inode);
if (vnode->vfs_inode.i_nlink == 0) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- __afs_break_callback(vnode);
+ __afs_break_callback(vnode, afs_cb_break_for_unlink);
}
write_sequnlock(&vnode->cb_lock);
ret = 0;
} else {
- afs_break_callback(vnode);
+ afs_break_callback(vnode, afs_cb_break_for_unlink);
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
kdebug("AFS_VNODE_DELETED");
@@ -1390,7 +1389,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
{
struct afs_fs_cursor fc;
struct afs_status_cb *scb;
- struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
struct key *key;
bool need_rehash = false;
int ret;
@@ -1413,15 +1413,12 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
}
/* Try to make sure we have a callback promise on the victim. */
- if (d_really_is_positive(dentry)) {
- vnode = AFS_FS_I(d_inode(dentry));
- ret = afs_validate(vnode, key);
- if (ret < 0)
- goto error_key;
- }
+ ret = afs_validate(vnode, key);
+ if (ret < 0)
+ goto error_key;
spin_lock(&dentry->d_lock);
- if (vnode && d_count(dentry) > 1) {
+ if (d_count(dentry) > 1) {
spin_unlock(&dentry->d_lock);
/* Start asynchronous writeout of the inode */
write_inode_now(d_inode(dentry), 0);
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index 057b8d322422..361088a5edb9 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -60,11 +60,6 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
afs_edit_dir_add(dvnode, &new->d_name,
&vnode->fid, afs_edit_dir_for_silly_1);
-
- /* vfs_unlink and the like do not issue this when a file is
- * sillyrenamed, so do it here.
- */
- fsnotify_nameremove(old, 0);
}
kfree(scb);
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 9b3b2f1f1fc0..bcd1bafb0278 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -24,6 +24,7 @@ const struct file_operations afs_dynroot_file_operations = {
static int afs_probe_cell_name(struct dentry *dentry)
{
struct afs_cell *cell;
+ struct afs_net *net = afs_d2net(dentry);
const char *name = dentry->d_name.name;
size_t len = dentry->d_name.len;
int ret;
@@ -36,13 +37,14 @@ static int afs_probe_cell_name(struct dentry *dentry)
len--;
}
- cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len);
+ cell = afs_lookup_cell_rcu(net, name, len);
if (!IS_ERR(cell)) {
- afs_put_cell(afs_d2net(dentry), cell);
+ afs_put_cell(net, cell);
return 0;
}
- ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL, false);
+ ret = dns_query(net->net, "afsdb", name, len, "srv=1",
+ NULL, NULL, false);
if (ret == -ENODATA)
ret = -EDESTADDRREQ;
return ret;
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 8fd7d3b9a1b1..56b69576274d 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -310,8 +310,7 @@ int afs_page_filler(void *data, struct page *page)
/* fall through */
default:
go_on:
- req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
- GFP_KERNEL);
+ req = kzalloc(struct_size(req, array, 1), GFP_KERNEL);
if (!req)
goto enomem;
@@ -461,8 +460,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
n++;
}
- req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *) * n,
- GFP_NOFS);
+ req = kzalloc(struct_size(req, array, n), GFP_NOFS);
if (!req)
return -ENOMEM;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index a1ef0266422a..1ce73e014139 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1911,7 +1911,7 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net,
return ERR_PTR(-ENOMEM);
call->key = key;
- call->server = afs_get_server(server);
+ call->server = afs_get_server(server, afs_server_trace_get_caps);
call->server_index = server_index;
call->upgrade = true;
call->async = true;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index b42d9d09669c..7b1c18c32f48 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -56,6 +56,16 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren
}
/*
+ * Set the file size and block count. Estimate the number of 512 bytes blocks
+ * used, rounded up to nearest 1K for consistency with other AFS clients.
+ */
+static void afs_set_i_size(struct afs_vnode *vnode, u64 size)
+{
+ i_size_write(&vnode->vfs_inode, size);
+ vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1;
+}
+
+/*
* Initialise an inode from the vnode status.
*/
static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
@@ -124,12 +134,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
}
- /*
- * Estimate 512 bytes blocks used, rounded up to nearest 1K
- * for consistency with other AFS clients.
- */
- inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1;
- i_size_write(&vnode->vfs_inode, status->size);
+ afs_set_i_size(vnode, status->size);
vnode->invalid_before = status->data_version;
inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
@@ -207,11 +212,13 @@ static void afs_apply_status(struct afs_fs_cursor *fc,
if (expected_version &&
*expected_version != status->data_version) {
- kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s",
- (unsigned long long) status->data_version,
- vnode->fid.vid, vnode->fid.vnode,
- (unsigned long long) *expected_version,
- fc->type ? fc->type->name : "???");
+ if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
+ pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n",
+ vnode->fid.vid, vnode->fid.vnode,
+ (unsigned long long)*expected_version,
+ (unsigned long long)status->data_version,
+ fc->type ? fc->type->name : "???");
+
vnode->invalid_before = status->data_version;
if (vnode->status.type == AFS_FTYPE_DIR) {
if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
@@ -230,7 +237,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc,
if (data_changed) {
inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
- i_size_write(&vnode->vfs_inode, status->size);
+ afs_set_i_size(vnode, status->size);
}
}
@@ -276,7 +283,7 @@ void afs_vnode_commit_status(struct afs_fs_cursor *fc,
if (scb->status.abort_code == VNOVNODE) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
clear_nlink(&vnode->vfs_inode);
- __afs_break_callback(vnode);
+ __afs_break_callback(vnode, afs_cb_break_for_deleted);
}
} else {
if (scb->have_status)
@@ -587,8 +594,9 @@ bool afs_check_validity(struct afs_vnode *vnode)
struct afs_cb_interest *cbi;
struct afs_server *server;
struct afs_volume *volume = vnode->volume;
+ enum afs_cb_break_reason need_clear = afs_cb_break_no_break;
time64_t now = ktime_get_real_seconds();
- bool valid, need_clear = false;
+ bool valid;
unsigned int cb_break, cb_s_break, cb_v_break;
int seq = 0;
@@ -606,13 +614,13 @@ bool afs_check_validity(struct afs_vnode *vnode)
vnode->cb_v_break != cb_v_break) {
vnode->cb_s_break = cb_s_break;
vnode->cb_v_break = cb_v_break;
- need_clear = true;
+ need_clear = afs_cb_break_for_vsbreak;
valid = false;
} else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- need_clear = true;
+ need_clear = afs_cb_break_for_zap;
valid = false;
} else if (vnode->cb_expires_at - 10 <= now) {
- need_clear = true;
+ need_clear = afs_cb_break_for_lapsed;
valid = false;
} else {
valid = true;
@@ -628,10 +636,12 @@ bool afs_check_validity(struct afs_vnode *vnode)
done_seqretry(&vnode->cb_lock, seq);
- if (need_clear) {
+ if (need_clear != afs_cb_break_no_break) {
write_seqlock(&vnode->cb_lock);
if (cb_break == vnode->cb_break)
- __afs_break_callback(vnode);
+ __afs_break_callback(vnode, need_clear);
+ else
+ trace_afs_cb_miss(&vnode->fid, need_clear);
write_sequnlock(&vnode->cb_lock);
valid = false;
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 8a67bf741880..f66a3be12fd6 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -109,10 +109,8 @@ struct afs_call {
struct rxrpc_call *rxcall; /* RxRPC call handle */
struct key *key; /* security for this call */
struct afs_net *net; /* The network namespace */
- union {
- struct afs_server *server;
- struct afs_vlserver *vlserver;
- };
+ struct afs_server *server; /* The fileserver record if fs op (pins ref) */
+ struct afs_vlserver *vlserver; /* The vlserver record if vl op */
struct afs_cb_interest *cbi; /* Callback interest for server used */
struct afs_vnode *lvnode; /* vnode being locked */
void *request; /* request data (first part) */
@@ -516,6 +514,7 @@ struct afs_server {
atomic_t usage;
u32 addr_version; /* Address list version */
u32 cm_epoch; /* Server RxRPC epoch */
+ unsigned int debug_id; /* Debugging ID for traces */
/* file service access */
rwlock_t fs_lock; /* access lock */
@@ -616,7 +615,7 @@ struct afs_volume {
unsigned int servers_seq; /* Incremented each time ->servers changes */
unsigned cb_v_break; /* Break-everything counter. */
- rwlock_t cb_break_lock;
+ rwlock_t cb_v_break_lock;
afs_voltype_t type; /* type of volume */
short error;
@@ -721,15 +720,6 @@ struct afs_permits {
};
/*
- * record of one of a system's set of network interfaces
- */
-struct afs_interface {
- struct in_addr address; /* IPv4 address bound to interface */
- struct in_addr netmask; /* netmask applied to address */
- unsigned mtu; /* MTU of interface */
-};
-
-/*
* Error prioritisation and accumulation.
*/
struct afs_error {
@@ -846,9 +836,9 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
* callback.c
*/
extern void afs_init_callback_state(struct afs_server *);
-extern void __afs_break_callback(struct afs_vnode *);
-extern void afs_break_callback(struct afs_vnode *);
-extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
+extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
+extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
+extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break *);
extern int afs_register_server_cb_interest(struct afs_vnode *,
struct afs_server_list *, unsigned int);
@@ -1092,12 +1082,6 @@ extern struct vfsmount *afs_d_automount(struct path *);
extern void afs_mntpt_kill_timer(void);
/*
- * netdevices.c
- */
-extern int afs_get_ipv4_interfaces(struct afs_net *, struct afs_interface *,
- size_t, bool);
-
-/*
* proc.c
*/
#ifdef CONFIG_PROC_FS
@@ -1242,17 +1226,12 @@ extern void __exit afs_clean_up_permit_cache(void);
*/
extern spinlock_t afs_server_peer_lock;
-static inline struct afs_server *afs_get_server(struct afs_server *server)
-{
- atomic_inc(&server->usage);
- return server;
-}
-
extern struct afs_server *afs_find_server(struct afs_net *,
const struct sockaddr_rxrpc *);
extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *);
extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *);
-extern void afs_put_server(struct afs_net *, struct afs_server *);
+extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace);
+extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace);
extern void afs_manage_servers(struct work_struct *);
extern void afs_servers_timer(struct timer_list *);
extern void __net_exit afs_purge_servers(struct afs_net *);
@@ -1436,7 +1415,7 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
{
if (fc->ac.error == -ENOENT) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- afs_break_callback(vnode);
+ afs_break_callback(vnode, afs_cb_break_for_deleted);
}
}
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 5497ab38f585..52b19e9c1535 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -10,6 +10,7 @@
#include <linux/errno.h>
#include "internal.h"
#include "afs_fs.h"
+#include "protocol_uae.h"
/*
* convert an AFS abort code to a Linux error number
@@ -65,34 +66,25 @@ int afs_abort_to_error(u32 abort_code)
case AFSVL_PERM: return -EACCES;
case AFSVL_NOMEM: return -EREMOTEIO;
- /* Unified AFS error table; ET "uae" == 0x2f6df00 */
- case 0x2f6df00: return -EPERM;
- case 0x2f6df01: return -ENOENT;
- case 0x2f6df04: return -EIO;
- case 0x2f6df0a: return -EAGAIN;
- case 0x2f6df0b: return -ENOMEM;
- case 0x2f6df0c: return -EACCES;
- case 0x2f6df0f: return -EBUSY;
- case 0x2f6df10: return -EEXIST;
- case 0x2f6df11: return -EXDEV;
- case 0x2f6df12: return -ENODEV;
- case 0x2f6df13: return -ENOTDIR;
- case 0x2f6df14: return -EISDIR;
- case 0x2f6df15: return -EINVAL;
- case 0x2f6df1a: return -EFBIG;
- case 0x2f6df1b: return -ENOSPC;
- case 0x2f6df1d: return -EROFS;
- case 0x2f6df1e: return -EMLINK;
- case 0x2f6df20: return -EDOM;
- case 0x2f6df21: return -ERANGE;
- case 0x2f6df22: return -EDEADLK;
- case 0x2f6df23: return -ENAMETOOLONG;
- case 0x2f6df24: return -ENOLCK;
- case 0x2f6df26: return -ENOTEMPTY;
- case 0x2f6df28: return -EWOULDBLOCK;
- case 0x2f6df69: return -ENOTCONN;
- case 0x2f6df6c: return -ETIMEDOUT;
- case 0x2f6df78: return -EDQUOT;
+ /* Unified AFS error table */
+ case UAEPERM: return -EPERM;
+ case UAENOENT: return -ENOENT;
+ case UAEACCES: return -EACCES;
+ case UAEBUSY: return -EBUSY;
+ case UAEEXIST: return -EEXIST;
+ case UAENOTDIR: return -ENOTDIR;
+ case UAEISDIR: return -EISDIR;
+ case UAEFBIG: return -EFBIG;
+ case UAENOSPC: return -ENOSPC;
+ case UAEROFS: return -EROFS;
+ case UAEMLINK: return -EMLINK;
+ case UAEDEADLK: return -EDEADLK;
+ case UAENAMETOOLONG: return -ENAMETOOLONG;
+ case UAENOLCK: return -ENOLCK;
+ case UAENOTEMPTY: return -ENOTEMPTY;
+ case UAELOOP: return -ELOOP;
+ case UAENOMEDIUM: return -ENOMEDIUM;
+ case UAEDQUOT: return -EDQUOT;
/* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */
case RXKADINCONSISTENCY: return -EPROTO;
diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c
deleted file mode 100644
index 2a009d1939d7..000000000000
--- a/fs/afs/netdevices.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* AFS network device helpers
- *
- * Copyright (c) 2007 Patrick McHardy <kaber@trash.net>
- */
-
-#include <linux/string.h>
-#include <linux/rtnetlink.h>
-#include <linux/inetdevice.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <net/net_namespace.h>
-#include "internal.h"
-
-/*
- * get a list of this system's interface IPv4 addresses, netmasks and MTUs
- * - maxbufs must be at least 1
- * - returns the number of interface records in the buffer
- */
-int afs_get_ipv4_interfaces(struct afs_net *net, struct afs_interface *bufs,
- size_t maxbufs, bool wantloopback)
-{
- struct net_device *dev;
- struct in_device *idev;
- int n = 0;
-
- ASSERT(maxbufs > 0);
-
- rtnl_lock();
- for_each_netdev(net->net, dev) {
- if (dev->type == ARPHRD_LOOPBACK && !wantloopback)
- continue;
- idev = __in_dev_get_rtnl(dev);
- if (!idev)
- continue;
- for_primary_ifa(idev) {
- bufs[n].address.s_addr = ifa->ifa_address;
- bufs[n].netmask.s_addr = ifa->ifa_mask;
- bufs[n].mtu = dev->mtu;
- n++;
- if (n >= maxbufs)
- goto out;
- } endfor_ifa(idev);
- }
-out:
- rtnl_unlock();
- return n;
-}
diff --git a/fs/afs/protocol_uae.h b/fs/afs/protocol_uae.h
new file mode 100644
index 000000000000..1b3d1060bd34
--- /dev/null
+++ b/fs/afs/protocol_uae.h
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Universal AFS Error codes (UAE).
+ *
+ * Copyright (C) 2003, Daria Phoebe Brashear
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ */
+
+enum {
+ UAEPERM = 0x2f6df00, /* Operation not permitted */
+ UAENOENT = 0x2f6df01, /* No such file or directory */
+ UAESRCH = 0x2f6df02, /* No such process */
+ UAEINTR = 0x2f6df03, /* Interrupted system call */
+ UAEIO = 0x2f6df04, /* I/O error */
+ UAENXIO = 0x2f6df05, /* No such device or address */
+ UAE2BIG = 0x2f6df06, /* Arg list too long */
+ UAENOEXEC = 0x2f6df07, /* Exec format error */
+ UAEBADF = 0x2f6df08, /* Bad file number */
+ UAECHILD = 0x2f6df09, /* No child processes */
+ UAEAGAIN = 0x2f6df0a, /* Try again */
+ UAENOMEM = 0x2f6df0b, /* Out of memory */
+ UAEACCES = 0x2f6df0c, /* Permission denied */
+ UAEFAULT = 0x2f6df0d, /* Bad address */
+ UAENOTBLK = 0x2f6df0e, /* Block device required */
+ UAEBUSY = 0x2f6df0f, /* Device or resource busy */
+ UAEEXIST = 0x2f6df10, /* File exists */
+ UAEXDEV = 0x2f6df11, /* Cross-device link */
+ UAENODEV = 0x2f6df12, /* No such device */
+ UAENOTDIR = 0x2f6df13, /* Not a directory */
+ UAEISDIR = 0x2f6df14, /* Is a directory */
+ UAEINVAL = 0x2f6df15, /* Invalid argument */
+ UAENFILE = 0x2f6df16, /* File table overflow */
+ UAEMFILE = 0x2f6df17, /* Too many open files */
+ UAENOTTY = 0x2f6df18, /* Not a typewriter */
+ UAETXTBSY = 0x2f6df19, /* Text file busy */
+ UAEFBIG = 0x2f6df1a, /* File too large */
+ UAENOSPC = 0x2f6df1b, /* No space left on device */
+ UAESPIPE = 0x2f6df1c, /* Illegal seek */
+ UAEROFS = 0x2f6df1d, /* Read-only file system */
+ UAEMLINK = 0x2f6df1e, /* Too many links */
+ UAEPIPE = 0x2f6df1f, /* Broken pipe */
+ UAEDOM = 0x2f6df20, /* Math argument out of domain of func */
+ UAERANGE = 0x2f6df21, /* Math result not representable */
+ UAEDEADLK = 0x2f6df22, /* Resource deadlock would occur */
+ UAENAMETOOLONG = 0x2f6df23, /* File name too long */
+ UAENOLCK = 0x2f6df24, /* No record locks available */
+ UAENOSYS = 0x2f6df25, /* Function not implemented */
+ UAENOTEMPTY = 0x2f6df26, /* Directory not empty */
+ UAELOOP = 0x2f6df27, /* Too many symbolic links encountered */
+ UAEWOULDBLOCK = 0x2f6df28, /* Operation would block */
+ UAENOMSG = 0x2f6df29, /* No message of desired type */
+ UAEIDRM = 0x2f6df2a, /* Identifier removed */
+ UAECHRNG = 0x2f6df2b, /* Channel number out of range */
+ UAEL2NSYNC = 0x2f6df2c, /* Level 2 not synchronized */
+ UAEL3HLT = 0x2f6df2d, /* Level 3 halted */
+ UAEL3RST = 0x2f6df2e, /* Level 3 reset */
+ UAELNRNG = 0x2f6df2f, /* Link number out of range */
+ UAEUNATCH = 0x2f6df30, /* Protocol driver not attached */
+ UAENOCSI = 0x2f6df31, /* No CSI structure available */
+ UAEL2HLT = 0x2f6df32, /* Level 2 halted */
+ UAEBADE = 0x2f6df33, /* Invalid exchange */
+ UAEBADR = 0x2f6df34, /* Invalid request descriptor */
+ UAEXFULL = 0x2f6df35, /* Exchange full */
+ UAENOANO = 0x2f6df36, /* No anode */
+ UAEBADRQC = 0x2f6df37, /* Invalid request code */
+ UAEBADSLT = 0x2f6df38, /* Invalid slot */
+ UAEBFONT = 0x2f6df39, /* Bad font file format */
+ UAENOSTR = 0x2f6df3a, /* Device not a stream */
+ UAENODATA = 0x2f6df3b, /* No data available */
+ UAETIME = 0x2f6df3c, /* Timer expired */
+ UAENOSR = 0x2f6df3d, /* Out of streams resources */
+ UAENONET = 0x2f6df3e, /* Machine is not on the network */
+ UAENOPKG = 0x2f6df3f, /* Package not installed */
+ UAEREMOTE = 0x2f6df40, /* Object is remote */
+ UAENOLINK = 0x2f6df41, /* Link has been severed */
+ UAEADV = 0x2f6df42, /* Advertise error */
+ UAESRMNT = 0x2f6df43, /* Srmount error */
+ UAECOMM = 0x2f6df44, /* Communication error on send */
+ UAEPROTO = 0x2f6df45, /* Protocol error */
+ UAEMULTIHOP = 0x2f6df46, /* Multihop attempted */
+ UAEDOTDOT = 0x2f6df47, /* RFS specific error */
+ UAEBADMSG = 0x2f6df48, /* Not a data message */
+ UAEOVERFLOW = 0x2f6df49, /* Value too large for defined data type */
+ UAENOTUNIQ = 0x2f6df4a, /* Name not unique on network */
+ UAEBADFD = 0x2f6df4b, /* File descriptor in bad state */
+ UAEREMCHG = 0x2f6df4c, /* Remote address changed */
+ UAELIBACC = 0x2f6df4d, /* Can not access a needed shared library */
+ UAELIBBAD = 0x2f6df4e, /* Accessing a corrupted shared library */
+ UAELIBSCN = 0x2f6df4f, /* .lib section in a.out corrupted */
+ UAELIBMAX = 0x2f6df50, /* Attempting to link in too many shared libraries */
+ UAELIBEXEC = 0x2f6df51, /* Cannot exec a shared library directly */
+ UAEILSEQ = 0x2f6df52, /* Illegal byte sequence */
+ UAERESTART = 0x2f6df53, /* Interrupted system call should be restarted */
+ UAESTRPIPE = 0x2f6df54, /* Streams pipe error */
+ UAEUSERS = 0x2f6df55, /* Too many users */
+ UAENOTSOCK = 0x2f6df56, /* Socket operation on non-socket */
+ UAEDESTADDRREQ = 0x2f6df57, /* Destination address required */
+ UAEMSGSIZE = 0x2f6df58, /* Message too long */
+ UAEPROTOTYPE = 0x2f6df59, /* Protocol wrong type for socket */
+ UAENOPROTOOPT = 0x2f6df5a, /* Protocol not available */
+ UAEPROTONOSUPPORT = 0x2f6df5b, /* Protocol not supported */
+ UAESOCKTNOSUPPORT = 0x2f6df5c, /* Socket type not supported */
+ UAEOPNOTSUPP = 0x2f6df5d, /* Operation not supported on transport endpoint */
+ UAEPFNOSUPPORT = 0x2f6df5e, /* Protocol family not supported */
+ UAEAFNOSUPPORT = 0x2f6df5f, /* Address family not supported by protocol */
+ UAEADDRINUSE = 0x2f6df60, /* Address already in use */
+ UAEADDRNOTAVAIL = 0x2f6df61, /* Cannot assign requested address */
+ UAENETDOWN = 0x2f6df62, /* Network is down */
+ UAENETUNREACH = 0x2f6df63, /* Network is unreachable */
+ UAENETRESET = 0x2f6df64, /* Network dropped connection because of reset */
+ UAECONNABORTED = 0x2f6df65, /* Software caused connection abort */
+ UAECONNRESET = 0x2f6df66, /* Connection reset by peer */
+ UAENOBUFS = 0x2f6df67, /* No buffer space available */
+ UAEISCONN = 0x2f6df68, /* Transport endpoint is already connected */
+ UAENOTCONN = 0x2f6df69, /* Transport endpoint is not connected */
+ UAESHUTDOWN = 0x2f6df6a, /* Cannot send after transport endpoint shutdown */
+ UAETOOMANYREFS = 0x2f6df6b, /* Too many references: cannot splice */
+ UAETIMEDOUT = 0x2f6df6c, /* Connection timed out */
+ UAECONNREFUSED = 0x2f6df6d, /* Connection refused */
+ UAEHOSTDOWN = 0x2f6df6e, /* Host is down */
+ UAEHOSTUNREACH = 0x2f6df6f, /* No route to host */
+ UAEALREADY = 0x2f6df70, /* Operation already in progress */
+ UAEINPROGRESS = 0x2f6df71, /* Operation now in progress */
+ UAESTALE = 0x2f6df72, /* Stale NFS file handle */
+ UAEUCLEAN = 0x2f6df73, /* Structure needs cleaning */
+ UAENOTNAM = 0x2f6df74, /* Not a XENIX named type file */
+ UAENAVAIL = 0x2f6df75, /* No XENIX semaphores available */
+ UAEISNAM = 0x2f6df76, /* Is a named type file */
+ UAEREMOTEIO = 0x2f6df77, /* Remote I/O error */
+ UAEDQUOT = 0x2f6df78, /* Quota exceeded */
+ UAENOMEDIUM = 0x2f6df79, /* No medium found */
+ UAEMEDIUMTYPE = 0x2f6df7a, /* Wrong medium type */
+};
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index d1dde2834b6d..0e5269374ac1 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -184,7 +184,7 @@ void afs_put_call(struct afs_call *call)
if (call->type->destructor)
call->type->destructor(call);
- afs_put_server(call->net, call->server);
+ afs_put_server(call->net, call->server, afs_server_trace_put_call);
afs_put_cb_interest(call->net, call->cbi);
afs_put_addrlist(call->alist);
kfree(call->request);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index e900cd74361b..64d440aaabc0 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -13,6 +13,7 @@
static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */
static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */
+static atomic_t afs_server_debug_id;
static void afs_inc_servers_outstanding(struct afs_net *net)
{
@@ -47,7 +48,7 @@ struct afs_server *afs_find_server(struct afs_net *net,
do {
if (server)
- afs_put_server(net, server);
+ afs_put_server(net, server, afs_server_trace_put_find_rsq);
server = NULL;
read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
@@ -112,7 +113,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
* changes.
*/
if (server)
- afs_put_server(net, server);
+ afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
server = NULL;
read_seqbegin_or_lock(&net->fs_lock, &seq);
@@ -127,7 +128,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
} else if (diff > 0) {
p = p->rb_right;
} else {
- afs_get_server(server);
+ afs_get_server(server, afs_server_trace_get_by_uuid);
break;
}
@@ -198,7 +199,7 @@ static struct afs_server *afs_install_server(struct afs_net *net,
ret = 0;
exists:
- afs_get_server(server);
+ afs_get_server(server, afs_server_trace_get_install);
write_sequnlock(&net->fs_lock);
return server;
}
@@ -219,6 +220,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
goto enomem;
atomic_set(&server->usage, 1);
+ server->debug_id = atomic_inc_return(&afs_server_debug_id);
RCU_INIT_POINTER(server->addresses, alist);
server->addr_version = alist->version;
server->uuid = *uuid;
@@ -230,6 +232,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
spin_lock_init(&server->probe_lock);
afs_inc_servers_outstanding(net);
+ trace_afs_server(server, 1, afs_server_trace_alloc);
_leave(" = %p", server);
return server;
@@ -325,9 +328,22 @@ void afs_servers_timer(struct timer_list *timer)
}
/*
+ * Get a reference on a server object.
+ */
+struct afs_server *afs_get_server(struct afs_server *server,
+ enum afs_server_trace reason)
+{
+ unsigned int u = atomic_inc_return(&server->usage);
+
+ trace_afs_server(server, u, reason);
+ return server;
+}
+
+/*
* Release a reference on a server record.
*/
-void afs_put_server(struct afs_net *net, struct afs_server *server)
+void afs_put_server(struct afs_net *net, struct afs_server *server,
+ enum afs_server_trace reason)
{
unsigned int usage;
@@ -338,7 +354,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server)
usage = atomic_dec_return(&server->usage);
- _enter("{%u}", usage);
+ trace_afs_server(server, usage, reason);
if (likely(usage > 0))
return;
@@ -350,6 +366,8 @@ static void afs_server_rcu(struct rcu_head *rcu)
{
struct afs_server *server = container_of(rcu, struct afs_server, rcu);
+ trace_afs_server(server, atomic_read(&server->usage),
+ afs_server_trace_free);
afs_put_addrlist(rcu_access_pointer(server->addresses));
kfree(server);
}
@@ -365,7 +383,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
.index = alist->preferred,
.error = 0,
};
- _enter("%p", server);
+
+ trace_afs_server(server, atomic_read(&server->usage),
+ afs_server_trace_give_up_cb);
if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
@@ -373,6 +393,8 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
wait_var_event(&server->probe_outstanding,
atomic_read(&server->probe_outstanding) == 0);
+ trace_afs_server(server, atomic_read(&server->usage),
+ afs_server_trace_destroy);
call_rcu(&server->rcu, afs_server_rcu);
afs_dec_servers_outstanding(net);
}
@@ -392,6 +414,7 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
write_seqlock(&net->fs_lock);
usage = 1;
deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
+ trace_afs_server(server, usage, afs_server_trace_gc);
if (deleted) {
rb_erase(&server->uuid_rb, &net->fs_servers);
hlist_del_rcu(&server->proc_link);
@@ -514,6 +537,8 @@ static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct a
_enter("");
+ trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
+
alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
&server->uuid);
if (IS_ERR(alist)) {
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index b4988bc8e6f2..888d91d195d9 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -16,7 +16,8 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
if (slist && refcount_dec_and_test(&slist->usage)) {
for (i = 0; i < slist->nr_servers; i++) {
afs_put_cb_interest(net, slist->servers[i].cb_interest);
- afs_put_server(net, slist->servers[i].server);
+ afs_put_server(net, slist->servers[i].server,
+ afs_server_trace_put_slist);
}
kfree(slist);
}
@@ -67,7 +68,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
break;
if (j < slist->nr_servers) {
if (slist->servers[j].server == server) {
- afs_put_server(cell->net, server);
+ afs_put_server(cell->net, server,
+ afs_server_trace_put_slist_isort);
continue;
}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 08fdb3951c49..1a414300b654 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -43,6 +43,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
atomic_set(&volume->usage, 1);
INIT_LIST_HEAD(&volume->proc_link);
rwlock_init(&volume->servers_lock);
+ rwlock_init(&volume->cb_v_break_lock);
memcpy(volume->name, vldb->name, vldb->name_len + 1);
slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 98eb7adbce91..cb76566763db 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -44,8 +44,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
return 0;
}
- req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
- GFP_KERNEL);
+ req = kzalloc(struct_size(req, array, 1), GFP_KERNEL);
if (!req)
return -ENOMEM;
diff --git a/fs/aio.c b/fs/aio.c
index 3490d1fa0e16..c1e581dd32f5 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2095,6 +2095,7 @@ SYSCALL_DEFINE6(io_pgetevents,
struct __aio_sigset ksig = { NULL, };
sigset_t ksigmask, sigsaved;
struct timespec64 ts;
+ bool interrupted;
int ret;
if (timeout && unlikely(get_timespec64(&ts, timeout)))
@@ -2108,8 +2109,10 @@ SYSCALL_DEFINE6(io_pgetevents,
return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
- restore_user_sigmask(ksig.sigmask, &sigsaved);
- if (signal_pending(current) && !ret)
+
+ interrupted = signal_pending(current);
+ restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
+ if (interrupted && !ret)
ret = -ERESTARTNOHAND;
return ret;
@@ -2128,6 +2131,7 @@ SYSCALL_DEFINE6(io_pgetevents_time32,
struct __aio_sigset ksig = { NULL, };
sigset_t ksigmask, sigsaved;
struct timespec64 ts;
+ bool interrupted;
int ret;
if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
@@ -2142,8 +2146,10 @@ SYSCALL_DEFINE6(io_pgetevents_time32,
return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
- restore_user_sigmask(ksig.sigmask, &sigsaved);
- if (signal_pending(current) && !ret)
+
+ interrupted = signal_pending(current);
+ restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
+ if (interrupted && !ret)
ret = -ERESTARTNOHAND;
return ret;
@@ -2193,6 +2199,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
struct __compat_aio_sigset ksig = { NULL, };
sigset_t ksigmask, sigsaved;
struct timespec64 t;
+ bool interrupted;
int ret;
if (timeout && get_old_timespec32(&t, timeout))
@@ -2206,8 +2213,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
- restore_user_sigmask(ksig.sigmask, &sigsaved);
- if (signal_pending(current) && !ret)
+
+ interrupted = signal_pending(current);
+ restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
+ if (interrupted && !ret)
ret = -ERESTARTNOHAND;
return ret;
@@ -2226,6 +2235,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
struct __compat_aio_sigset ksig = { NULL, };
sigset_t ksigmask, sigsaved;
struct timespec64 t;
+ bool interrupted;
int ret;
if (timeout && get_timespec64(&t, timeout))
@@ -2239,8 +2249,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
- restore_user_sigmask(ksig.sigmask, &sigsaved);
- if (signal_pending(current) && !ret)
+
+ interrupted = signal_pending(current);
+ restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
+ if (interrupted && !ret)
ret = -ERESTARTNOHAND;
return ret;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 82a48e830018..8c6b50f34466 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -42,6 +42,11 @@
#include <asm/unaligned.h>
#include <asm/cacheflush.h>
#include <asm/page.h>
+#include <asm/flat.h>
+
+#ifndef flat_get_relocate_addr
+#define flat_get_relocate_addr(rel) (rel)
+#endif
/****************************************************************************/
@@ -63,6 +68,12 @@
#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */
+#ifdef CONFIG_BINFMT_SHARED_FLAT
+#define MAX_SHARED_LIBS (4)
+#else
+#define MAX_SHARED_LIBS (1)
+#endif
+
struct lib_info {
struct {
unsigned long start_code; /* Start of text segment */
@@ -120,14 +131,15 @@ static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start
sp -= bprm->envc + 1;
sp -= bprm->argc + 1;
- sp -= flat_argvp_envp_on_stack() ? 2 : 0;
+ if (IS_ENABLED(CONFIG_BINFMT_FLAT_ARGVP_ENVP_ON_STACK))
+ sp -= 2; /* argvp + envp */
sp -= 1; /* &argc */
current->mm->start_stack = (unsigned long)sp & -FLAT_STACK_ALIGN;
sp = (unsigned long __user *)current->mm->start_stack;
__put_user(bprm->argc, sp++);
- if (flat_argvp_envp_on_stack()) {
+ if (IS_ENABLED(CONFIG_BINFMT_FLAT_ARGVP_ENVP_ON_STACK)) {
unsigned long argv, envp;
argv = (unsigned long)(sp + 2);
envp = (unsigned long)(sp + 2 + bprm->argc + 1);
@@ -345,7 +357,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
start_code = p->lib_list[id].start_code;
text_len = p->lib_list[id].text_len;
- if (!flat_reloc_valid(r, start_brk - start_data + text_len)) {
+ if (r > start_brk - start_data + text_len) {
pr_err("reloc outside program 0x%lx (0 - 0x%lx/0x%lx)",
r, start_brk-start_data+text_len, text_len);
goto failed;
@@ -368,6 +380,7 @@ failed:
/****************************************************************************/
+#ifdef CONFIG_BINFMT_FLAT_OLD
static void old_reloc(unsigned long rl)
{
static const char *segment[] = { "TEXT", "DATA", "BSS", "*UNKNOWN*" };
@@ -405,6 +418,7 @@ static void old_reloc(unsigned long rl)
pr_debug("Relocation became %lx\n", val);
}
+#endif /* CONFIG_BINFMT_FLAT_OLD */
/****************************************************************************/
@@ -415,7 +429,8 @@ static int load_flat_file(struct linux_binprm *bprm,
unsigned long textpos, datapos, realdatastart;
u32 text_len, data_len, bss_len, stack_len, full_data, flags;
unsigned long len, memp, memp_size, extra, rlim;
- u32 __user *reloc, *rp;
+ __be32 __user *reloc;
+ u32 __user *rp;
struct inode *inode;
int i, rev, relocs;
loff_t fpos;
@@ -454,6 +469,7 @@ static int load_flat_file(struct linux_binprm *bprm,
if (flags & FLAT_FLAG_KTRACE)
pr_info("Loading file: %s\n", bprm->filename);
+#ifdef CONFIG_BINFMT_FLAT_OLD
if (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION) {
pr_err("bad flat file version 0x%x (supported 0x%lx and 0x%lx)\n",
rev, FLAT_VERSION, OLD_FLAT_VERSION);
@@ -470,6 +486,23 @@ static int load_flat_file(struct linux_binprm *bprm,
}
/*
+ * fix up the flags for the older format, there were all kinds
+ * of endian hacks, this only works for the simple cases
+ */
+ if (rev == OLD_FLAT_VERSION &&
+ (flags || IS_ENABLED(CONFIG_BINFMT_FLAT_OLD_ALWAYS_RAM)))
+ flags = FLAT_FLAG_RAM;
+
+#else /* CONFIG_BINFMT_FLAT_OLD */
+ if (rev != FLAT_VERSION) {
+ pr_err("bad flat file version 0x%x (supported 0x%lx)\n",
+ rev, FLAT_VERSION);
+ ret = -ENOEXEC;
+ goto err;
+ }
+#endif /* !CONFIG_BINFMT_FLAT_OLD */
+
+ /*
* Make sure the header params are sane.
* 28 bits (256 MB) is way more than reasonable in this case.
* If some top bits are set we have probable binary corruption.
@@ -480,13 +513,6 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
- /*
- * fix up the flags for the older format, there were all kinds
- * of endian hacks, this only works for the simple cases
- */
- if (rev == OLD_FLAT_VERSION && flat_old_ram_flag(flags))
- flags = FLAT_FLAG_RAM;
-
#ifndef CONFIG_BINFMT_ZFLAT
if (flags & (FLAT_FLAG_GZIP|FLAT_FLAG_GZDATA)) {
pr_err("Support for ZFLAT executables is not enabled.\n");
@@ -547,7 +573,7 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
- len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
+ len = data_len + extra;
len = PAGE_ALIGN(len);
realdatastart = vm_mmap(NULL, 0, len,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
@@ -561,9 +587,7 @@ static int load_flat_file(struct linux_binprm *bprm,
vm_munmap(textpos, text_len);
goto err;
}
- datapos = ALIGN(realdatastart +
- MAX_SHARED_LIBS * sizeof(unsigned long),
- FLAT_DATA_ALIGN);
+ datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN);
pr_debug("Allocated data+bss+stack (%u bytes): %lx\n",
data_len + bss_len + stack_len, datapos);
@@ -587,13 +611,13 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
- reloc = (u32 __user *)
+ reloc = (__be32 __user *)
(datapos + (ntohl(hdr->reloc_start) - text_len));
memp = realdatastart;
memp_size = len;
} else {
- len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(u32);
+ len = text_len + data_len + extra;
len = PAGE_ALIGN(len);
textpos = vm_mmap(NULL, 0, len,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
@@ -608,11 +632,9 @@ static int load_flat_file(struct linux_binprm *bprm,
}
realdatastart = textpos + ntohl(hdr->data_start);
- datapos = ALIGN(realdatastart +
- MAX_SHARED_LIBS * sizeof(u32),
- FLAT_DATA_ALIGN);
+ datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN);
- reloc = (u32 __user *)
+ reloc = (__be32 __user *)
(datapos + (ntohl(hdr->reloc_start) - text_len));
memp = textpos;
memp_size = len;
@@ -627,8 +649,9 @@ static int load_flat_file(struct linux_binprm *bprm,
(text_len + full_data
- sizeof(struct flat_hdr)),
0);
- memmove((void *) datapos, (void *) realdatastart,
- full_data);
+ if (datapos != realdatastart)
+ memmove((void *)datapos, (void *)realdatastart,
+ full_data);
#else
/*
* This is used on MMU systems mainly for testing.
@@ -684,8 +707,7 @@ static int load_flat_file(struct linux_binprm *bprm,
if (IS_ERR_VALUE(result)) {
ret = result;
pr_err("Unable to read code+data+bss, errno %d\n", ret);
- vm_munmap(textpos, text_len + data_len + extra +
- MAX_SHARED_LIBS * sizeof(u32));
+ vm_munmap(textpos, text_len + data_len + extra);
goto err;
}
}
@@ -775,20 +797,18 @@ static int load_flat_file(struct linux_binprm *bprm,
* __start to address 4 so that is okay).
*/
if (rev > OLD_FLAT_VERSION) {
- u32 __maybe_unused persistent = 0;
for (i = 0; i < relocs; i++) {
u32 addr, relval;
+ __be32 tmp;
/*
* Get the address of the pointer to be
* relocated (of course, the address has to be
* relocated first).
*/
- if (get_user(relval, reloc + i))
+ if (get_user(tmp, reloc + i))
return -EFAULT;
- relval = ntohl(relval);
- if (flat_set_persistent(relval, &persistent))
- continue;
+ relval = ntohl(tmp);
addr = flat_get_relocate_addr(relval);
rp = (u32 __user *)calc_reloc(addr, libinfo, id, 1);
if (rp == (u32 __user *)RELOC_FAILED) {
@@ -797,8 +817,7 @@ static int load_flat_file(struct linux_binprm *bprm,
}
/* Get the pointer's value. */
- ret = flat_get_addr_from_rp(rp, relval, flags,
- &addr, &persistent);
+ ret = flat_get_addr_from_rp(rp, relval, flags, &addr);
if (unlikely(ret))
goto err;
@@ -807,8 +826,13 @@ static int load_flat_file(struct linux_binprm *bprm,
* Do the relocation. PIC relocs in the data section are
* already in target order
*/
- if ((flags & FLAT_FLAG_GOTPIC) == 0)
- addr = ntohl(addr);
+ if ((flags & FLAT_FLAG_GOTPIC) == 0) {
+ /*
+ * Meh, the same value can have a different
+ * byte order based on a flag..
+ */
+ addr = ntohl((__force __be32)addr);
+ }
addr = calc_reloc(addr, libinfo, id, 0);
if (addr == RELOC_FAILED) {
ret = -ENOEXEC;
@@ -821,14 +845,15 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
}
+#ifdef CONFIG_BINFMT_FLAT_OLD
} else {
for (i = 0; i < relocs; i++) {
- u32 relval;
+ __be32 relval;
if (get_user(relval, reloc + i))
return -EFAULT;
- relval = ntohl(relval);
- old_reloc(relval);
+ old_reloc(ntohl(relval));
}
+#endif /* CONFIG_BINFMT_FLAT_OLD */
}
flush_icache_range(start_code, end_code);
@@ -856,9 +881,14 @@ err:
static int load_flat_shared_library(int id, struct lib_info *libs)
{
+ /*
+ * This is a fake bprm struct; only the members "buf", "file" and
+ * "filename" are actually used.
+ */
struct linux_binprm bprm;
int res;
char buf[16];
+ loff_t pos = 0;
memset(&bprm, 0, sizeof(bprm));
@@ -872,25 +902,11 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
if (IS_ERR(bprm.file))
return res;
- bprm.cred = prepare_exec_creds();
- res = -ENOMEM;
- if (!bprm.cred)
- goto out;
-
- /* We don't really care about recalculating credentials at this point
- * as we're past the point of no return and are dealing with shared
- * libraries.
- */
- bprm.called_set_creds = 1;
-
- res = prepare_binprm(&bprm);
+ res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos);
- if (!res)
+ if (res >= 0)
res = load_flat_file(&bprm, libs, id, NULL);
- abort_creds(bprm.cred);
-
-out:
allow_write_access(bprm.file);
fput(bprm.file);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 749f5984425d..f00b569a9f89 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -203,13 +203,12 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
{
struct file *file = iocb->ki_filp;
struct block_device *bdev = I_BDEV(bdev_file_inode(file));
- struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec;
+ struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
loff_t pos = iocb->ki_pos;
bool should_dirty = false;
struct bio bio;
ssize_t ret;
blk_qc_t qc;
- struct bvec_iter_all iter_all;
if ((pos | iov_iter_alignment(iter)) &
(bdev_logical_block_size(bdev) - 1))
@@ -259,13 +258,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
}
__set_current_state(TASK_RUNNING);
- bio_for_each_segment_all(bvec, &bio, iter_all) {
- if (should_dirty && !PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
- if (!bio_flagged(&bio, BIO_NO_PAGE_REF))
- put_page(bvec->bv_page);
- }
-
+ bio_release_pages(&bio, should_dirty);
if (unlikely(bio.bi_status))
ret = blk_status_to_errno(bio.bi_status);
@@ -335,13 +328,7 @@ static void blkdev_bio_end_io(struct bio *bio)
if (should_dirty) {
bio_check_pages_dirty(bio);
} else {
- if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
- struct bvec_iter_all iter_all;
- struct bio_vec *bvec;
-
- bio_for_each_segment_all(bvec, bio, iter_all)
- put_page(bvec->bv_page);
- }
+ bio_release_pages(bio, false);
bio_put(bio);
}
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2a1be0d1a698..56ae2f659b6d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2928,8 +2928,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
inode_lock(inode);
err = btrfs_delete_subvolume(dir, dentry);
inode_unlock(inode);
- if (!err)
+ if (!err) {
+ fsnotify_rmdir(dir, dentry);
d_delete(dentry);
+ }
out_dput:
dput(dentry);
diff --git a/fs/buffer.c b/fs/buffer.c
index e450c55f6434..49a871570092 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2086,38 +2086,6 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
}
EXPORT_SYMBOL(block_write_begin);
-void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
- struct page *page)
-{
- loff_t old_size = inode->i_size;
- bool i_size_changed = false;
-
- /*
- * No need to use i_size_read() here, the i_size cannot change under us
- * because we hold i_rwsem.
- *
- * But it's important to update i_size while still holding page lock:
- * page writeout could otherwise come in and zero beyond i_size.
- */
- if (pos + copied > inode->i_size) {
- i_size_write(inode, pos + copied);
- i_size_changed = true;
- }
-
- unlock_page(page);
-
- if (old_size < pos)
- pagecache_isize_extended(inode, old_size, pos);
- /*
- * Don't mark the inode dirty under page lock. First, it unnecessarily
- * makes the holding time of page lock longer. Second, it forces lock
- * ordering of page lock and transaction start for journaling
- * filesystems.
- */
- if (i_size_changed)
- mark_inode_dirty(inode);
-}
-
int block_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
@@ -2158,9 +2126,37 @@ int generic_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
+ struct inode *inode = mapping->host;
+ loff_t old_size = inode->i_size;
+ bool i_size_changed = false;
+
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
- __generic_write_end(mapping->host, pos, copied, page);
+
+ /*
+ * No need to use i_size_read() here, the i_size cannot change under us
+ * because we hold i_rwsem.
+ *
+ * But it's important to update i_size while still holding page lock:
+ * page writeout could otherwise come in and zero beyond i_size.
+ */
+ if (pos + copied > inode->i_size) {
+ i_size_write(inode, pos + copied);
+ i_size_changed = true;
+ }
+
+ unlock_page(page);
put_page(page);
+
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
+ /*
+ * Don't mark the inode dirty under page lock. First, it unnecessarily
+ * makes the holding time of page lock longer. Second, it forces lock
+ * ordering of page lock and transaction start for journaling
+ * filesystems.
+ */
+ if (i_size_changed)
+ mark_inode_dirty(inode);
return copied;
}
EXPORT_SYMBOL(generic_write_end);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 183c37c0a8fc..c5517ffeb11c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1889,9 +1889,9 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode,
return 0;
}
-static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
- struct file *dst_file, loff_t dst_off,
- size_t len, unsigned int flags)
+static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off,
+ size_t len, unsigned int flags)
{
struct inode *src_inode = file_inode(src_file);
struct inode *dst_inode = file_inode(dst_file);
@@ -1909,6 +1909,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
if (src_inode == dst_inode)
return -EINVAL;
+ if (src_inode->i_sb != dst_inode->i_sb)
+ return -EXDEV;
if (ceph_snap(dst_inode) != CEPH_NOSNAP)
return -EROFS;
@@ -2100,6 +2102,21 @@ out:
return ret;
}
+static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off,
+ size_t len, unsigned int flags)
+{
+ ssize_t ret;
+
+ ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off,
+ len, flags);
+
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(src_file, src_off, dst_file,
+ dst_off, len, flags);
+ return ret;
+}
+
const struct file_operations ceph_file_fops = {
.open = ceph_open,
.release = ceph_release,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6af2d0d4a87a..c8a9b89b922d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2121,9 +2121,10 @@ retry:
if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
dout("build_path path+%d: %p SNAPDIR\n",
pos, temp);
- } else if (stop_on_nosnap && inode &&
+ } else if (stop_on_nosnap && inode && dentry != temp &&
ceph_snap(inode) == CEPH_NOSNAP) {
spin_unlock(&temp->d_lock);
+ pos++; /* get rid of any prepended '/' */
break;
} else {
pos -= temp->d_name.len;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index aae2b8b2adf5..523e9ea78a28 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -10,7 +10,7 @@ config CIFS
select CRYPTO_SHA512
select CRYPTO_CMAC
select CRYPTO_HMAC
- select CRYPTO_ARC4
+ select CRYPTO_LIB_ARC4
select CRYPTO_AEAD2
select CRYPTO_CCM
select CRYPTO_ECB
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index d2a05e46d6f5..97b7497c13ef 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -33,7 +33,8 @@
#include <linux/ctype.h>
#include <linux/random.h>
#include <linux/highmem.h>
-#include <crypto/skcipher.h>
+#include <linux/fips.h>
+#include <crypto/arc4.h>
#include <crypto/aead.h>
int __cifs_calc_signature(struct smb_rqst *rqst,
@@ -772,63 +773,32 @@ setup_ntlmv2_rsp_ret:
int
calc_seckey(struct cifs_ses *ses)
{
- int rc;
- struct crypto_skcipher *tfm_arc4;
- struct scatterlist sgin, sgout;
- struct skcipher_request *req;
- unsigned char *sec_key;
+ unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
+ struct arc4_ctx *ctx_arc4;
- sec_key = kmalloc(CIFS_SESS_KEY_SIZE, GFP_KERNEL);
- if (sec_key == NULL)
- return -ENOMEM;
+ if (fips_enabled)
+ return -ENODEV;
get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
- tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm_arc4)) {
- rc = PTR_ERR(tfm_arc4);
- cifs_dbg(VFS, "could not allocate crypto API arc4\n");
- goto out;
- }
-
- rc = crypto_skcipher_setkey(tfm_arc4, ses->auth_key.response,
- CIFS_SESS_KEY_SIZE);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not set response as a key\n",
- __func__);
- goto out_free_cipher;
- }
-
- req = skcipher_request_alloc(tfm_arc4, GFP_KERNEL);
- if (!req) {
- rc = -ENOMEM;
- cifs_dbg(VFS, "could not allocate crypto API arc4 request\n");
- goto out_free_cipher;
+ ctx_arc4 = kmalloc(sizeof(*ctx_arc4), GFP_KERNEL);
+ if (!ctx_arc4) {
+ cifs_dbg(VFS, "could not allocate arc4 context\n");
+ return -ENOMEM;
}
- sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE);
- sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
-
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sgin, &sgout, CIFS_CPHTXT_SIZE, NULL);
-
- rc = crypto_skcipher_encrypt(req);
- skcipher_request_free(req);
- if (rc) {
- cifs_dbg(VFS, "could not encrypt session key rc: %d\n", rc);
- goto out_free_cipher;
- }
+ arc4_setkey(ctx_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE);
+ arc4_crypt(ctx_arc4, ses->ntlmssp->ciphertext, sec_key,
+ CIFS_CPHTXT_SIZE);
/* make secondary_key/nonce as session key */
memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE);
/* and make len as that of session key only */
ses->auth_key.len = CIFS_SESS_KEY_SIZE;
-out_free_cipher:
- crypto_free_skcipher(tfm_arc4);
-out:
- kfree(sec_key);
- return rc;
+ memzero_explicit(sec_key, CIFS_SESS_KEY_SIZE);
+ kzfree(ctx_arc4);
+ return 0;
}
void
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 65d9771e49f9..24635b65effa 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1149,6 +1149,10 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff,
len, flags);
free_xid(xid);
+
+ if (rc == -EOPNOTSUPP || rc == -EXDEV)
+ rc = generic_copy_file_range(src_file, off, dst_file,
+ destoff, len, flags);
return rc;
}
@@ -1591,7 +1595,6 @@ MODULE_DESCRIPTION
("VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and "
"also older servers complying with the SNIA CIFS Specification)");
MODULE_VERSION(CIFS_VERSION);
-MODULE_SOFTDEP("pre: arc4");
MODULE_SOFTDEP("pre: des");
MODULE_SOFTDEP("pre: ecb");
MODULE_SOFTDEP("pre: hmac");
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8dd6637a3cbb..714a359c7c8d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2631,7 +2631,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
task = xchg(&server->tsk, NULL);
if (task)
- force_sig(SIGKILL, task);
+ send_sig(SIGKILL, task, 1);
}
static struct TCP_Server_Info *
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 1e21b2528cfb..534cbba72789 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -77,7 +77,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
goto name_is_IP_address;
/* Perform the upcall */
- rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL, false);
+ rc = dns_query(current->nsproxy->net_ns, NULL, hostname, len,
+ NULL, ip_addr, NULL, false);
if (rc < 0)
cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n",
__func__, len, len, hostname);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 3fdc6a41b304..9fd56b0acd7e 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2372,6 +2372,41 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
kfree(dfs_rsp);
return rc;
}
+
+static int
+parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf,
+ u32 plen, char **target_path,
+ struct cifs_sb_info *cifs_sb)
+{
+ unsigned int sub_len;
+ unsigned int sub_offset;
+
+ /* We only handle Symbolic Link : MS-FSCC 2.1.2.4 */
+ if (le32_to_cpu(symlink_buf->ReparseTag) != IO_REPARSE_TAG_SYMLINK) {
+ cifs_dbg(VFS, "srv returned invalid symlink buffer\n");
+ return -EIO;
+ }
+
+ sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset);
+ sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength);
+ if (sub_offset + 20 > plen ||
+ sub_offset + sub_len + 20 > plen) {
+ cifs_dbg(VFS, "srv returned malformed symlink buffer\n");
+ return -EIO;
+ }
+
+ *target_path = cifs_strndup_from_utf16(
+ symlink_buf->PathBuffer + sub_offset,
+ sub_len, true, cifs_sb->local_nls);
+ if (!(*target_path))
+ return -ENOMEM;
+
+ convert_delimiter(*target_path, '/');
+ cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
+
+ return 0;
+}
+
#define SMB2_SYMLINK_STRUCT_SIZE \
(sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp))
@@ -2401,11 +2436,13 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
struct kvec close_iov[1];
struct smb2_create_rsp *create_rsp;
struct smb2_ioctl_rsp *ioctl_rsp;
- char *ioctl_buf;
+ struct reparse_data_buffer *reparse_buf;
u32 plen;
cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
+ *target_path = NULL;
+
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
@@ -2483,17 +2520,36 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
if ((rc == 0) && (is_reparse_point)) {
/* See MS-FSCC 2.3.23 */
- ioctl_buf = (char *)ioctl_rsp + le32_to_cpu(ioctl_rsp->OutputOffset);
+ reparse_buf = (struct reparse_data_buffer *)
+ ((char *)ioctl_rsp +
+ le32_to_cpu(ioctl_rsp->OutputOffset));
plen = le32_to_cpu(ioctl_rsp->OutputCount);
if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) >
rsp_iov[1].iov_len) {
- cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", plen);
+ cifs_dbg(VFS, "srv returned invalid ioctl len: %d\n",
+ plen);
+ rc = -EIO;
+ goto querty_exit;
+ }
+
+ if (plen < 8) {
+ cifs_dbg(VFS, "reparse buffer is too small. Must be "
+ "at least 8 bytes but was %d\n", plen);
+ rc = -EIO;
+ goto querty_exit;
+ }
+
+ if (plen < le16_to_cpu(reparse_buf->ReparseDataLength) + 8) {
+ cifs_dbg(VFS, "srv returned invalid reparse buf "
+ "length: %d\n", plen);
rc = -EIO;
goto querty_exit;
}
- /* Do stuff with ioctl_buf/plen */
+ rc = parse_reparse_symlink(
+ (struct reparse_symlink_data_buffer *)reparse_buf,
+ plen, target_path, cifs_sb);
goto querty_exit;
}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index c7d5813bebd8..858353d20c39 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -914,7 +914,19 @@ struct reparse_mount_point_data_buffer {
__u8 PathBuffer[0]; /* Variable Length */
} __packed;
-/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */
+#define SYMLINK_FLAG_RELATIVE 0x00000001
+
+struct reparse_symlink_data_buffer {
+ __le32 ReparseTag;
+ __le16 ReparseDataLength;
+ __u16 Reserved;
+ __le16 SubstituteNameOffset;
+ __le16 SubstituteNameLength;
+ __le16 PrintNameOffset;
+ __le16 PrintNameLength;
+ __le32 Flags;
+ __u8 PathBuffer[0]; /* Variable Length */
+} __packed;
/* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index d2ca5287762d..92112915de8e 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -13,6 +13,7 @@
#undef DEBUG
#include <linux/fs.h>
+#include <linux/fsnotify.h>
#include <linux/mount.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -1788,6 +1789,7 @@ void configfs_unregister_group(struct config_group *group)
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
+ fsnotify_rmdir(d_inode(parent), dentry);
d_delete(dentry);
inode_unlock(d_inode(parent));
@@ -1916,6 +1918,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
+ fsnotify_rmdir(d_inode(root), dentry);
inode_unlock(d_inode(dentry));
d_delete(dentry);
diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig
index 24ed99e2eca0..5fdf24877c17 100644
--- a/fs/crypto/Kconfig
+++ b/fs/crypto/Kconfig
@@ -7,7 +7,6 @@ config FS_ENCRYPTION
select CRYPTO_ECB
select CRYPTO_XTS
select CRYPTO_CTS
- select CRYPTO_SHA256
select KEYS
help
Enable encryption of files and directories. This
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index b46021ebde85..82da2510721f 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -33,9 +33,8 @@ static void __fscrypt_decrypt_bio(struct bio *bio, bool done)
bio_for_each_segment_all(bv, bio, iter_all) {
struct page *page = bv->bv_page;
- int ret = fscrypt_decrypt_page(page->mapping->host, page,
- PAGE_SIZE, 0, page->index);
-
+ int ret = fscrypt_decrypt_pagecache_blocks(page, bv->bv_len,
+ bv->bv_offset);
if (ret)
SetPageError(page);
else if (done)
@@ -53,9 +52,8 @@ EXPORT_SYMBOL(fscrypt_decrypt_bio);
static void completion_pages(struct work_struct *work)
{
- struct fscrypt_ctx *ctx =
- container_of(work, struct fscrypt_ctx, r.work);
- struct bio *bio = ctx->r.bio;
+ struct fscrypt_ctx *ctx = container_of(work, struct fscrypt_ctx, work);
+ struct bio *bio = ctx->bio;
__fscrypt_decrypt_bio(bio, true);
fscrypt_release_ctx(ctx);
@@ -64,57 +62,29 @@ static void completion_pages(struct work_struct *work)
void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio)
{
- INIT_WORK(&ctx->r.work, completion_pages);
- ctx->r.bio = bio;
- fscrypt_enqueue_decrypt_work(&ctx->r.work);
+ INIT_WORK(&ctx->work, completion_pages);
+ ctx->bio = bio;
+ fscrypt_enqueue_decrypt_work(&ctx->work);
}
EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio);
-void fscrypt_pullback_bio_page(struct page **page, bool restore)
-{
- struct fscrypt_ctx *ctx;
- struct page *bounce_page;
-
- /* The bounce data pages are unmapped. */
- if ((*page)->mapping)
- return;
-
- /* The bounce data page is unmapped. */
- bounce_page = *page;
- ctx = (struct fscrypt_ctx *)page_private(bounce_page);
-
- /* restore control page */
- *page = ctx->w.control_page;
-
- if (restore)
- fscrypt_restore_control_page(bounce_page);
-}
-EXPORT_SYMBOL(fscrypt_pullback_bio_page);
-
int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
- struct fscrypt_ctx *ctx;
- struct page *ciphertext_page = NULL;
+ const unsigned int blockbits = inode->i_blkbits;
+ const unsigned int blocksize = 1 << blockbits;
+ struct page *ciphertext_page;
struct bio *bio;
int ret, err = 0;
- BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
-
- ctx = fscrypt_get_ctx(GFP_NOFS);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- ciphertext_page = fscrypt_alloc_bounce_page(ctx, GFP_NOWAIT);
- if (IS_ERR(ciphertext_page)) {
- err = PTR_ERR(ciphertext_page);
- goto errout;
- }
+ ciphertext_page = fscrypt_alloc_bounce_page(GFP_NOWAIT);
+ if (!ciphertext_page)
+ return -ENOMEM;
while (len--) {
- err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk,
- ZERO_PAGE(0), ciphertext_page,
- PAGE_SIZE, 0, GFP_NOFS);
+ err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk,
+ ZERO_PAGE(0), ciphertext_page,
+ blocksize, 0, GFP_NOFS);
if (err)
goto errout;
@@ -124,14 +94,11 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
goto errout;
}
bio_set_dev(bio, inode->i_sb->s_bdev);
- bio->bi_iter.bi_sector =
- pblk << (inode->i_sb->s_blocksize_bits - 9);
+ bio->bi_iter.bi_sector = pblk << (blockbits - 9);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- ret = bio_add_page(bio, ciphertext_page,
- inode->i_sb->s_blocksize, 0);
- if (ret != inode->i_sb->s_blocksize) {
+ ret = bio_add_page(bio, ciphertext_page, blocksize, 0);
+ if (WARN_ON(ret != blocksize)) {
/* should never happen! */
- WARN_ON(1);
bio_put(bio);
err = -EIO;
goto errout;
@@ -147,7 +114,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
}
err = 0;
errout:
- fscrypt_release_ctx(ctx);
+ fscrypt_free_bounce_page(ciphertext_page);
return err;
}
EXPORT_SYMBOL(fscrypt_zeroout_range);
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 335a362ee446..45c3d0427fb2 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -59,23 +59,16 @@ void fscrypt_enqueue_decrypt_work(struct work_struct *work)
EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work);
/**
- * fscrypt_release_ctx() - Releases an encryption context
- * @ctx: The encryption context to release.
+ * fscrypt_release_ctx() - Release a decryption context
+ * @ctx: The decryption context to release.
*
- * If the encryption context was allocated from the pre-allocated pool, returns
- * it to that pool. Else, frees it.
- *
- * If there's a bounce page in the context, this frees that.
+ * If the decryption context was allocated from the pre-allocated pool, return
+ * it to that pool. Else, free it.
*/
void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
{
unsigned long flags;
- if (ctx->flags & FS_CTX_HAS_BOUNCE_BUFFER_FL && ctx->w.bounce_page) {
- mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool);
- ctx->w.bounce_page = NULL;
- }
- ctx->w.control_page = NULL;
if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
kmem_cache_free(fscrypt_ctx_cachep, ctx);
} else {
@@ -87,12 +80,12 @@ void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
EXPORT_SYMBOL(fscrypt_release_ctx);
/**
- * fscrypt_get_ctx() - Gets an encryption context
+ * fscrypt_get_ctx() - Get a decryption context
* @gfp_flags: The gfp flag for memory allocation
*
- * Allocates and initializes an encryption context.
+ * Allocate and initialize a decryption context.
*
- * Return: A new encryption context on success; an ERR_PTR() otherwise.
+ * Return: A new decryption context on success; an ERR_PTR() otherwise.
*/
struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
{
@@ -100,14 +93,8 @@ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
unsigned long flags;
/*
- * We first try getting the ctx from a free list because in
- * the common case the ctx will have an allocated and
- * initialized crypto tfm, so it's probably a worthwhile
- * optimization. For the bounce page, we first try getting it
- * from the kernel allocator because that's just about as fast
- * as getting it from a list and because a cache of free pages
- * should generally be a "last resort" option for a filesystem
- * to be able to do its job.
+ * First try getting a ctx from the free list so that we don't have to
+ * call into the slab allocator.
*/
spin_lock_irqsave(&fscrypt_ctx_lock, flags);
ctx = list_first_entry_or_null(&fscrypt_free_ctxs,
@@ -123,11 +110,31 @@ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
} else {
ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
}
- ctx->flags &= ~FS_CTX_HAS_BOUNCE_BUFFER_FL;
return ctx;
}
EXPORT_SYMBOL(fscrypt_get_ctx);
+struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags)
+{
+ return mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
+}
+
+/**
+ * fscrypt_free_bounce_page() - free a ciphertext bounce page
+ *
+ * Free a bounce page that was allocated by fscrypt_encrypt_pagecache_blocks(),
+ * or by fscrypt_alloc_bounce_page() directly.
+ */
+void fscrypt_free_bounce_page(struct page *bounce_page)
+{
+ if (!bounce_page)
+ return;
+ set_page_private(bounce_page, (unsigned long)NULL);
+ ClearPagePrivate(bounce_page);
+ mempool_free(bounce_page, fscrypt_bounce_page_pool);
+}
+EXPORT_SYMBOL(fscrypt_free_bounce_page);
+
void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
const struct fscrypt_info *ci)
{
@@ -141,10 +148,11 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw);
}
-int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
- u64 lblk_num, struct page *src_page,
- struct page *dest_page, unsigned int len,
- unsigned int offs, gfp_t gfp_flags)
+/* Encrypt or decrypt a single filesystem block of file contents */
+int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw,
+ u64 lblk_num, struct page *src_page,
+ struct page *dest_page, unsigned int len,
+ unsigned int offs, gfp_t gfp_flags)
{
union fscrypt_iv iv;
struct skcipher_request *req = NULL;
@@ -154,7 +162,10 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
- BUG_ON(len == 0);
+ if (WARN_ON_ONCE(len <= 0))
+ return -EINVAL;
+ if (WARN_ON_ONCE(len % FS_CRYPTO_BLOCK_SIZE != 0))
+ return -EINVAL;
fscrypt_generate_iv(&iv, lblk_num, ci);
@@ -186,126 +197,158 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
return 0;
}
-struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
- gfp_t gfp_flags)
-{
- ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
- if (ctx->w.bounce_page == NULL)
- return ERR_PTR(-ENOMEM);
- ctx->flags |= FS_CTX_HAS_BOUNCE_BUFFER_FL;
- return ctx->w.bounce_page;
-}
-
/**
- * fscypt_encrypt_page() - Encrypts a page
- * @inode: The inode for which the encryption should take place
- * @page: The page to encrypt. Must be locked for bounce-page
- * encryption.
- * @len: Length of data to encrypt in @page and encrypted
- * data in returned page.
- * @offs: Offset of data within @page and returned
- * page holding encrypted data.
- * @lblk_num: Logical block number. This must be unique for multiple
- * calls with same inode, except when overwriting
- * previously written data.
- * @gfp_flags: The gfp flag for memory allocation
- *
- * Encrypts @page using the ctx encryption context. Performs encryption
- * either in-place or into a newly allocated bounce page.
- * Called on the page write path.
+ * fscrypt_encrypt_pagecache_blocks() - Encrypt filesystem blocks from a pagecache page
+ * @page: The locked pagecache page containing the block(s) to encrypt
+ * @len: Total size of the block(s) to encrypt. Must be a nonzero
+ * multiple of the filesystem's block size.
+ * @offs: Byte offset within @page of the first block to encrypt. Must be
+ * a multiple of the filesystem's block size.
+ * @gfp_flags: Memory allocation flags
*
- * Bounce page allocation is the default.
- * In this case, the contents of @page are encrypted and stored in an
- * allocated bounce page. @page has to be locked and the caller must call
- * fscrypt_restore_control_page() on the returned ciphertext page to
- * release the bounce buffer and the encryption context.
+ * A new bounce page is allocated, and the specified block(s) are encrypted into
+ * it. In the bounce page, the ciphertext block(s) will be located at the same
+ * offsets at which the plaintext block(s) were located in the source page; any
+ * other parts of the bounce page will be left uninitialized. However, normally
+ * blocksize == PAGE_SIZE and the whole page is encrypted at once.
*
- * In-place encryption is used by setting the FS_CFLG_OWN_PAGES flag in
- * fscrypt_operations. Here, the input-page is returned with its content
- * encrypted.
+ * This is for use by the filesystem's ->writepages() method.
*
- * Return: A page with the encrypted content on success. Else, an
- * error value or NULL.
+ * Return: the new encrypted bounce page on success; an ERR_PTR() on failure
*/
-struct page *fscrypt_encrypt_page(const struct inode *inode,
- struct page *page,
- unsigned int len,
- unsigned int offs,
- u64 lblk_num, gfp_t gfp_flags)
+struct page *fscrypt_encrypt_pagecache_blocks(struct page *page,
+ unsigned int len,
+ unsigned int offs,
+ gfp_t gfp_flags)
{
- struct fscrypt_ctx *ctx;
- struct page *ciphertext_page = page;
+ const struct inode *inode = page->mapping->host;
+ const unsigned int blockbits = inode->i_blkbits;
+ const unsigned int blocksize = 1 << blockbits;
+ struct page *ciphertext_page;
+ u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) +
+ (offs >> blockbits);
+ unsigned int i;
int err;
- BUG_ON(len % FS_CRYPTO_BLOCK_SIZE != 0);
+ if (WARN_ON_ONCE(!PageLocked(page)))
+ return ERR_PTR(-EINVAL);
- if (inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES) {
- /* with inplace-encryption we just encrypt the page */
- err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, page,
- ciphertext_page, len, offs,
- gfp_flags);
- if (err)
- return ERR_PTR(err);
+ if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize)))
+ return ERR_PTR(-EINVAL);
- return ciphertext_page;
- }
-
- BUG_ON(!PageLocked(page));
-
- ctx = fscrypt_get_ctx(gfp_flags);
- if (IS_ERR(ctx))
- return ERR_CAST(ctx);
-
- /* The encryption operation will require a bounce page. */
- ciphertext_page = fscrypt_alloc_bounce_page(ctx, gfp_flags);
- if (IS_ERR(ciphertext_page))
- goto errout;
+ ciphertext_page = fscrypt_alloc_bounce_page(gfp_flags);
+ if (!ciphertext_page)
+ return ERR_PTR(-ENOMEM);
- ctx->w.control_page = page;
- err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num,
- page, ciphertext_page, len, offs,
- gfp_flags);
- if (err) {
- ciphertext_page = ERR_PTR(err);
- goto errout;
+ for (i = offs; i < offs + len; i += blocksize, lblk_num++) {
+ err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num,
+ page, ciphertext_page,
+ blocksize, i, gfp_flags);
+ if (err) {
+ fscrypt_free_bounce_page(ciphertext_page);
+ return ERR_PTR(err);
+ }
}
SetPagePrivate(ciphertext_page);
- set_page_private(ciphertext_page, (unsigned long)ctx);
- lock_page(ciphertext_page);
+ set_page_private(ciphertext_page, (unsigned long)page);
return ciphertext_page;
+}
+EXPORT_SYMBOL(fscrypt_encrypt_pagecache_blocks);
-errout:
- fscrypt_release_ctx(ctx);
- return ciphertext_page;
+/**
+ * fscrypt_encrypt_block_inplace() - Encrypt a filesystem block in-place
+ * @inode: The inode to which this block belongs
+ * @page: The page containing the block to encrypt
+ * @len: Size of block to encrypt. Doesn't need to be a multiple of the
+ * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE.
+ * @offs: Byte offset within @page at which the block to encrypt begins
+ * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based
+ * number of the block within the file
+ * @gfp_flags: Memory allocation flags
+ *
+ * Encrypt a possibly-compressed filesystem block that is located in an
+ * arbitrary page, not necessarily in the original pagecache page. The @inode
+ * and @lblk_num must be specified, as they can't be determined from @page.
+ *
+ * Return: 0 on success; -errno on failure
+ */
+int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page,
+ unsigned int len, unsigned int offs,
+ u64 lblk_num, gfp_t gfp_flags)
+{
+ return fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, page, page,
+ len, offs, gfp_flags);
}
-EXPORT_SYMBOL(fscrypt_encrypt_page);
+EXPORT_SYMBOL(fscrypt_encrypt_block_inplace);
/**
- * fscrypt_decrypt_page() - Decrypts a page in-place
- * @inode: The corresponding inode for the page to decrypt.
- * @page: The page to decrypt. Must be locked in case
- * it is a writeback page (FS_CFLG_OWN_PAGES unset).
- * @len: Number of bytes in @page to be decrypted.
- * @offs: Start of data in @page.
- * @lblk_num: Logical block number.
+ * fscrypt_decrypt_pagecache_blocks() - Decrypt filesystem blocks in a pagecache page
+ * @page: The locked pagecache page containing the block(s) to decrypt
+ * @len: Total size of the block(s) to decrypt. Must be a nonzero
+ * multiple of the filesystem's block size.
+ * @offs: Byte offset within @page of the first block to decrypt. Must be
+ * a multiple of the filesystem's block size.
*
- * Decrypts page in-place using the ctx encryption context.
+ * The specified block(s) are decrypted in-place within the pagecache page,
+ * which must still be locked and not uptodate. Normally, blocksize ==
+ * PAGE_SIZE and the whole page is decrypted at once.
*
- * Called from the read completion callback.
+ * This is for use by the filesystem's ->readpages() method.
*
- * Return: Zero on success, non-zero otherwise.
+ * Return: 0 on success; -errno on failure
*/
-int fscrypt_decrypt_page(const struct inode *inode, struct page *page,
- unsigned int len, unsigned int offs, u64 lblk_num)
+int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len,
+ unsigned int offs)
{
- if (!(inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES))
- BUG_ON(!PageLocked(page));
+ const struct inode *inode = page->mapping->host;
+ const unsigned int blockbits = inode->i_blkbits;
+ const unsigned int blocksize = 1 << blockbits;
+ u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) +
+ (offs >> blockbits);
+ unsigned int i;
+ int err;
+
+ if (WARN_ON_ONCE(!PageLocked(page)))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize)))
+ return -EINVAL;
+
+ for (i = offs; i < offs + len; i += blocksize, lblk_num++) {
+ err = fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page,
+ page, blocksize, i, GFP_NOFS);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(fscrypt_decrypt_pagecache_blocks);
- return fscrypt_do_page_crypto(inode, FS_DECRYPT, lblk_num, page, page,
- len, offs, GFP_NOFS);
+/**
+ * fscrypt_decrypt_block_inplace() - Decrypt a filesystem block in-place
+ * @inode: The inode to which this block belongs
+ * @page: The page containing the block to decrypt
+ * @len: Size of block to decrypt. Doesn't need to be a multiple of the
+ * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE.
+ * @offs: Byte offset within @page at which the block to decrypt begins
+ * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based
+ * number of the block within the file
+ *
+ * Decrypt a possibly-compressed filesystem block that is located in an
+ * arbitrary page, not necessarily in the original pagecache page. The @inode
+ * and @lblk_num must be specified, as they can't be determined from @page.
+ *
+ * Return: 0 on success; -errno on failure
+ */
+int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page,
+ unsigned int len, unsigned int offs,
+ u64 lblk_num)
+{
+ return fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, page,
+ len, offs, GFP_NOFS);
}
-EXPORT_SYMBOL(fscrypt_decrypt_page);
+EXPORT_SYMBOL(fscrypt_decrypt_block_inplace);
/*
* Validate dentries in encrypted directories to make sure we aren't potentially
@@ -355,18 +398,6 @@ const struct dentry_operations fscrypt_d_ops = {
.d_revalidate = fscrypt_d_revalidate,
};
-void fscrypt_restore_control_page(struct page *page)
-{
- struct fscrypt_ctx *ctx;
-
- ctx = (struct fscrypt_ctx *)page_private(page);
- set_page_private(page, (unsigned long)NULL);
- ClearPagePrivate(page);
- unlock_page(page);
- fscrypt_release_ctx(ctx);
-}
-EXPORT_SYMBOL(fscrypt_restore_control_page);
-
static void fscrypt_destroy(void)
{
struct fscrypt_ctx *pos, *n;
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index eccea3d8f923..00d150ff3033 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -12,7 +12,6 @@
*/
#include <linux/scatterlist.h>
-#include <linux/ratelimit.h>
#include <crypto/skcipher.h>
#include "fscrypt_private.h"
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 7da276159593..8978eec9d766 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -94,7 +94,6 @@ typedef enum {
} fscrypt_direction_t;
#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
-#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002
static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
u32 filenames_mode)
@@ -117,14 +116,12 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
/* crypto.c */
extern struct kmem_cache *fscrypt_info_cachep;
extern int fscrypt_initialize(unsigned int cop_flags);
-extern int fscrypt_do_page_crypto(const struct inode *inode,
- fscrypt_direction_t rw, u64 lblk_num,
- struct page *src_page,
- struct page *dest_page,
- unsigned int len, unsigned int offs,
- gfp_t gfp_flags);
-extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
- gfp_t gfp_flags);
+extern int fscrypt_crypt_block(const struct inode *inode,
+ fscrypt_direction_t rw, u64 lblk_num,
+ struct page *src_page, struct page *dest_page,
+ unsigned int len, unsigned int offs,
+ gfp_t gfp_flags);
+extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags);
extern const struct dentry_operations fscrypt_d_ops;
extern void __printf(3, 4) __cold
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index bd525f7573a4..c1d6715d88e9 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -5,7 +5,6 @@
* Encryption hooks for higher-level filesystem operations.
*/
-#include <linux/ratelimit.h>
#include "fscrypt_private.h"
/**
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index dcd91a3fbe49..207ebed918c1 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -12,7 +12,6 @@
#include <keys/user-type.h>
#include <linux/hashtable.h>
#include <linux/scatterlist.h>
-#include <linux/ratelimit.h>
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <crypto/sha.h>
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index d536889ac31b..4941fe8471ce 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -81,6 +81,8 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
if (ret == -ENODATA) {
if (!S_ISDIR(inode->i_mode))
ret = -ENOTDIR;
+ else if (IS_DEADDIR(inode))
+ ret = -ENOENT;
else if (!inode->i_sb->s_cop->empty_dir(inode))
ret = -ENOTEMPTY;
else
diff --git a/fs/dax.c b/fs/dax.c
index 2e48c7ebb973..fe5e33810cd4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -720,12 +720,11 @@ static void *dax_insert_entry(struct xa_state *xas,
xas_reset(xas);
xas_lock_irq(xas);
- if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
+ if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+ void *old;
+
dax_disassociate_entry(entry, mapping, false);
dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
- }
-
- if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
/*
* Only swap our new entry into the page cache if the current
* entry is a zero page or an empty entry. If a normal PTE or
@@ -734,7 +733,7 @@ static void *dax_insert_entry(struct xa_state *xas,
* existing entry is a PMD, we will just leave the PMD in the
* tree and dirty it if necessary.
*/
- void *old = dax_lock_entry(xas, new_entry);
+ old = dax_lock_entry(xas, new_entry);
WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) |
DAX_LOCKED));
entry = new_entry;
@@ -1188,7 +1187,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
unsigned flags = 0;
if (iov_iter_rw(iter) == WRITE) {
- lockdep_assert_held_exclusive(&inode->i_rwsem);
+ lockdep_assert_held_write(&inode->i_rwsem);
flags |= IOMAP_WRITE;
} else {
lockdep_assert_held(&inode->i_rwsem);
diff --git a/fs/dcache.c b/fs/dcache.c
index c435398f2c81..f41121e5d1ec 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2372,7 +2372,6 @@ EXPORT_SYMBOL(d_hash_and_lookup);
void d_delete(struct dentry * dentry)
{
struct inode *inode = dentry->d_inode;
- int isdir = d_is_dir(dentry);
spin_lock(&inode->i_lock);
spin_lock(&dentry->d_lock);
@@ -2387,7 +2386,6 @@ void d_delete(struct dentry * dentry)
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
}
- fsnotify_nameremove(dentry, isdir);
}
EXPORT_SYMBOL(d_delete);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index acef14ad53db..1e444fe1f778 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -617,13 +617,10 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
}
EXPORT_SYMBOL_GPL(debugfs_create_symlink);
-static void __debugfs_remove_file(struct dentry *dentry, struct dentry *parent)
+static void __debugfs_file_removed(struct dentry *dentry)
{
struct debugfs_fsdata *fsd;
- simple_unlink(d_inode(parent), dentry);
- d_delete(dentry);
-
/*
* Paired with the closing smp_mb() implied by a successful
* cmpxchg() in debugfs_file_get(): either
@@ -644,16 +641,18 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
if (simple_positive(dentry)) {
dget(dentry);
- if (!d_is_reg(dentry)) {
- if (d_is_dir(dentry))
- ret = simple_rmdir(d_inode(parent), dentry);
- else
- simple_unlink(d_inode(parent), dentry);
+ if (d_is_dir(dentry)) {
+ ret = simple_rmdir(d_inode(parent), dentry);
if (!ret)
- d_delete(dentry);
+ fsnotify_rmdir(d_inode(parent), dentry);
} else {
- __debugfs_remove_file(dentry, parent);
+ simple_unlink(d_inode(parent), dentry);
+ fsnotify_unlink(d_inode(parent), dentry);
}
+ if (!ret)
+ d_delete(dentry);
+ if (d_is_reg(dentry))
+ __debugfs_file_removed(dentry);
dput(dentry);
}
return ret;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 2c14ae044dce..beeadca23b05 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -621,6 +621,7 @@ void devpts_pty_kill(struct dentry *dentry)
dentry->d_fsdata = NULL;
drop_nlink(dentry->d_inode);
+ fsnotify_unlink(d_inode(dentry->d_parent), dentry);
d_delete(dentry);
dput(dentry); /* d_alloc_name() in devpts_pty_new() */
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index ac7fb19b6ade..ae196784f487 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -538,8 +538,8 @@ static struct bio *dio_await_one(struct dio *dio)
*/
static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
{
- struct bio_vec *bvec;
blk_status_t err = bio->bi_status;
+ bool should_dirty = dio->op == REQ_OP_READ && dio->should_dirty;
if (err) {
if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
@@ -548,19 +548,10 @@ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
dio->io_error = -EIO;
}
- if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
+ if (dio->is_async && should_dirty) {
bio_check_pages_dirty(bio); /* transfers ownership */
} else {
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- struct page *page = bvec->bv_page;
-
- if (dio->op == REQ_OP_READ && !PageCompound(page) &&
- dio->should_dirty)
- set_page_dirty_lock(page);
- put_page(page);
- }
+ bio_release_pages(bio, should_dirty);
bio_put(bio);
}
return err;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index c6f513100cc9..4c74c768ae43 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2325,7 +2325,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
error = do_epoll_wait(epfd, events, maxevents, timeout);
- restore_user_sigmask(sigmask, &sigsaved);
+ restore_user_sigmask(sigmask, &sigsaved, error == -EINTR);
return error;
}
@@ -2350,7 +2350,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
err = do_epoll_wait(epfd, events, maxevents, timeout);
- restore_user_sigmask(sigmask, &sigsaved);
+ restore_user_sigmask(sigmask, &sigsaved, err == -EINTR);
return err;
}
diff --git a/fs/exec.c b/fs/exec.c
index 89a500bb897a..c71cbfe6826a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1663,7 +1663,7 @@ int search_binary_handler(struct linux_binprm *bprm)
if (retval < 0 && !bprm->mm) {
/* we got to flush_old_exec() and failed after it */
read_unlock(&binfmt_lock);
- force_sigsegv(SIGSEGV, current);
+ force_sigsegv(SIGSEGV);
return retval;
}
if (retval != -ENOEXEC || !bprm->file) {
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 33db13365c5e..547c165299c0 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -1197,7 +1197,7 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi)
/*
* Returns 1 if the passed-in block region is valid; 0 if some part overlaps
- * with filesystem metadata blocksi.
+ * with filesystem metadata blocks.
*/
int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk,
unsigned int count)
@@ -1212,7 +1212,6 @@ int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk,
(start_blk + count >= sbi->s_sb_block))
return 0;
-
return 1;
}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index a0c5ea91fcd4..fda7d3f5b4be 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -172,9 +172,7 @@ static void ext2_preread_inode(struct inode *inode)
struct backing_dev_info *bdi;
bdi = inode_to_bdi(inode);
- if (bdi_read_congested(bdi))
- return;
- if (bdi_write_congested(bdi))
+ if (bdi_rw_congested(bdi))
return;
block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
@@ -511,6 +509,7 @@ repeat_in_this_group:
/*
* Scanned all blockgroups.
*/
+ brelse(bitmap_bh);
err = -ENOSPC;
goto fail;
got:
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e474127dd255..7004ce581a32 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1400,7 +1400,7 @@ void ext2_set_file_ops(struct inode *inode)
struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
{
struct ext2_inode_info *ei;
- struct buffer_head * bh;
+ struct buffer_head * bh = NULL;
struct ext2_inode *raw_inode;
struct inode *inode;
long ret = -EIO;
@@ -1446,7 +1446,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
*/
if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) {
/* this inode is deleted */
- brelse (bh);
ret = -ESTALE;
goto bad_inode;
}
@@ -1463,7 +1462,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
!ext2_data_block_valid(EXT2_SB(sb), ei->i_file_acl, 1)) {
ext2_error(sb, "ext2_iget", "bad extended attribute block %u",
ei->i_file_acl);
- brelse(bh);
ret = -EFSCORRUPTED;
goto bad_inode;
}
@@ -1526,6 +1524,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
return inode;
bad_inode:
+ brelse(bh);
iget_failed(inode);
return ERR_PTR(ret);
}
@@ -1640,7 +1639,7 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
}
int ext2_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_falgs)
+ u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct ext2_inode_info *ei = EXT2_I(inode);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1d7ab73b1014..44eb6e7eb492 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -303,16 +303,16 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root)
if (test_opt(sb, NOBH))
seq_puts(seq, ",nobh");
- if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA)
+ if (test_opt(sb, USRQUOTA))
seq_puts(seq, ",usrquota");
- if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA)
+ if (test_opt(sb, GRPQUOTA))
seq_puts(seq, ",grpquota");
- if (sbi->s_mount_opt & EXT2_MOUNT_XIP)
+ if (test_opt(sb, XIP))
seq_puts(seq, ",xip");
- if (sbi->s_mount_opt & EXT2_MOUNT_DAX)
+ if (test_opt(sb, DAX))
seq_puts(seq, ",dax");
if (!test_opt(sb, RESERVATION))
@@ -935,8 +935,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_resgid = opts.s_resgid;
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
- ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
- SB_POSIXACL : 0);
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
sb->s_iflags |= SB_I_CGROUPWB;
if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
@@ -967,11 +966,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
- if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
+ if (test_opt(sb, DAX)) {
if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
ext2_msg(sb, KERN_ERR,
"DAX unsupported by block device. Turning off DAX.");
- sbi->s_mount_opt &= ~EXT2_MOUNT_DAX;
+ clear_opt(sbi->s_mount_opt, DAX);
}
}
@@ -1404,7 +1403,7 @@ out_set:
sbi->s_resuid = new_opts.s_resuid;
sbi->s_resgid = new_opts.s_resgid;
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
- ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0);
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
spin_unlock(&sbi->s_lock);
return 0;
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 1e33e0ac8cf1..79369c13cc55 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -134,6 +134,53 @@ ext2_xattr_handler(int name_index)
return handler;
}
+static bool
+ext2_xattr_header_valid(struct ext2_xattr_header *header)
+{
+ if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
+ header->h_blocks != cpu_to_le32(1))
+ return false;
+
+ return true;
+}
+
+static bool
+ext2_xattr_entry_valid(struct ext2_xattr_entry *entry,
+ char *end, size_t end_offs)
+{
+ struct ext2_xattr_entry *next;
+ size_t size;
+
+ next = EXT2_XATTR_NEXT(entry);
+ if ((char *)next >= end)
+ return false;
+
+ if (entry->e_value_block != 0)
+ return false;
+
+ size = le32_to_cpu(entry->e_value_size);
+ if (size > end_offs ||
+ le16_to_cpu(entry->e_value_offs) + size > end_offs)
+ return false;
+
+ return true;
+}
+
+static int
+ext2_xattr_cmp_entry(int name_index, size_t name_len, const char *name,
+ struct ext2_xattr_entry *entry)
+{
+ int cmp;
+
+ cmp = name_index - entry->e_name_index;
+ if (!cmp)
+ cmp = name_len - entry->e_name_len;
+ if (!cmp)
+ cmp = memcmp(name, entry->e_name, name_len);
+
+ return cmp;
+}
+
/*
* ext2_xattr_get()
*
@@ -152,7 +199,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
struct ext2_xattr_entry *entry;
size_t name_len, size;
char *end;
- int error;
+ int error, not_found;
struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
@@ -176,9 +223,9 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size;
- if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
+ if (!ext2_xattr_header_valid(HDR(bh))) {
+bad_block:
+ ext2_error(inode->i_sb, "ext2_xattr_get",
"inode %ld: bad block %d", inode->i_ino,
EXT2_I(inode)->i_file_acl);
error = -EIO;
@@ -188,29 +235,25 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
/* find named attribute */
entry = FIRST_ENTRY(bh);
while (!IS_LAST_ENTRY(entry)) {
- struct ext2_xattr_entry *next =
- EXT2_XATTR_NEXT(entry);
- if ((char *)next >= end)
+ if (!ext2_xattr_entry_valid(entry, end,
+ inode->i_sb->s_blocksize))
goto bad_block;
- if (name_index == entry->e_name_index &&
- name_len == entry->e_name_len &&
- memcmp(name, entry->e_name, name_len) == 0)
+
+ not_found = ext2_xattr_cmp_entry(name_index, name_len, name,
+ entry);
+ if (!not_found)
goto found;
- entry = next;
+ if (not_found < 0)
+ break;
+
+ entry = EXT2_XATTR_NEXT(entry);
}
if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
error = -ENODATA;
goto cleanup;
found:
- /* check the buffer size */
- if (entry->e_value_block != 0)
- goto bad_block;
size = le32_to_cpu(entry->e_value_size);
- if (size > inode->i_sb->s_blocksize ||
- le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
- goto bad_block;
-
if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
if (buffer) {
@@ -266,9 +309,9 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size;
- if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
+ if (!ext2_xattr_header_valid(HDR(bh))) {
+bad_block:
+ ext2_error(inode->i_sb, "ext2_xattr_list",
"inode %ld: bad block %d", inode->i_ino,
EXT2_I(inode)->i_file_acl);
error = -EIO;
@@ -278,11 +321,10 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
/* check the on-disk data structure */
entry = FIRST_ENTRY(bh);
while (!IS_LAST_ENTRY(entry)) {
- struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(entry);
-
- if ((char *)next >= end)
+ if (!ext2_xattr_entry_valid(entry, end,
+ inode->i_sb->s_blocksize))
goto bad_block;
- entry = next;
+ entry = EXT2_XATTR_NEXT(entry);
}
if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
@@ -367,7 +409,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL;
struct ext2_xattr_header *header = NULL;
- struct ext2_xattr_entry *here, *last;
+ struct ext2_xattr_entry *here = NULL, *last = NULL;
size_t name_len, free, min_offs = sb->s_blocksize;
int not_found = 1, error;
char *end;
@@ -406,47 +448,39 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
le32_to_cpu(HDR(bh)->h_refcount));
header = HDR(bh);
end = bh->b_data + bh->b_size;
- if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
- header->h_blocks != cpu_to_le32(1)) {
-bad_block: ext2_error(sb, "ext2_xattr_set",
+ if (!ext2_xattr_header_valid(header)) {
+bad_block:
+ ext2_error(sb, "ext2_xattr_set",
"inode %ld: bad block %d", inode->i_ino,
EXT2_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
}
- /* Find the named attribute. */
- here = FIRST_ENTRY(bh);
- while (!IS_LAST_ENTRY(here)) {
- struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here);
- if ((char *)next >= end)
- goto bad_block;
- if (!here->e_value_block && here->e_value_size) {
- size_t offs = le16_to_cpu(here->e_value_offs);
- if (offs < min_offs)
- min_offs = offs;
- }
- not_found = name_index - here->e_name_index;
- if (!not_found)
- not_found = name_len - here->e_name_len;
- if (!not_found)
- not_found = memcmp(name, here->e_name,name_len);
- if (not_found <= 0)
- break;
- here = next;
- }
- last = here;
- /* We still need to compute min_offs and last. */
+ /*
+ * Find the named attribute. If not found, 'here' will point
+ * to entry where the new attribute should be inserted to
+ * maintain sorting.
+ */
+ last = FIRST_ENTRY(bh);
while (!IS_LAST_ENTRY(last)) {
- struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last);
- if ((char *)next >= end)
+ if (!ext2_xattr_entry_valid(last, end, sb->s_blocksize))
goto bad_block;
- if (!last->e_value_block && last->e_value_size) {
+ if (last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < min_offs)
min_offs = offs;
}
- last = next;
+ if (not_found > 0) {
+ not_found = ext2_xattr_cmp_entry(name_index,
+ name_len,
+ name, last);
+ if (not_found <= 0)
+ here = last;
+ }
+ last = EXT2_XATTR_NEXT(last);
}
+ if (not_found > 0)
+ here = last;
/* Check whether we have enough space left. */
free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
@@ -454,7 +488,6 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
/* We will use a new extended attribute block. */
free = sb->s_blocksize -
sizeof(struct ext2_xattr_header) - sizeof(__u32);
- here = last = NULL; /* avoid gcc uninitialized warning. */
}
if (not_found) {
@@ -470,14 +503,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
error = -EEXIST;
if (flags & XATTR_CREATE)
goto cleanup;
- if (!here->e_value_block && here->e_value_size) {
- size_t size = le32_to_cpu(here->e_value_size);
-
- if (le16_to_cpu(here->e_value_offs) + size >
- sb->s_blocksize || size > sb->s_blocksize)
- goto bad_block;
- free += EXT2_XATTR_SIZE(size);
- }
+ free += EXT2_XATTR_SIZE(le32_to_cpu(here->e_value_size));
free += EXT2_XATTR_LEN(name_len);
}
error = -ENOSPC;
@@ -506,11 +532,10 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
unlock_buffer(bh);
ea_bdebug(bh, "cloning");
- header = kmalloc(bh->b_size, GFP_KERNEL);
+ header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL);
error = -ENOMEM;
if (header == NULL)
goto cleanup;
- memcpy(header, HDR(bh), bh->b_size);
header->h_refcount = cpu_to_le32(1);
offset = (char *)here - bh->b_data;
@@ -542,7 +567,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
here->e_name_len = name_len;
memcpy(here->e_name, name, name_len);
} else {
- if (!here->e_value_block && here->e_value_size) {
+ if (here->e_value_size) {
char *first_val = (char *)header + min_offs;
size_t offs = le16_to_cpu(here->e_value_offs);
char *val = (char *)header + offs;
@@ -569,7 +594,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
last = ENTRY(header+1);
while (!IS_LAST_ENTRY(last)) {
size_t o = le16_to_cpu(last->e_value_offs);
- if (!last->e_value_block && o < offs)
+ if (o < offs)
last->e_value_offs =
cpu_to_le16(o + size);
last = EXT2_XATTR_NEXT(last);
@@ -784,8 +809,7 @@ ext2_xattr_delete_inode(struct inode *inode)
goto cleanup;
}
ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
- if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
+ if (!ext2_xattr_header_valid(HDR(bh))) {
ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
"inode %ld: bad block %d", inode->i_ino,
EXT2_I(inode)->i_file_acl);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e5d6ee61ff48..0b202e00d93f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -603,9 +603,9 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
}
/**
- * ext4_should_retry_alloc()
+ * ext4_should_retry_alloc() - check if a block allocation should be retried
* @sb: super block
- * @retries number of attemps has been made
+ * @retries: number of attemps has been made
*
* ext4_should_retry_alloc() is called when ENOSPC is returned, and if
* it is profitable to retry the operation, this function will wait
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index c7843b149a1e..86054f31fe4d 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -33,6 +33,9 @@
static int ext4_dx_readdir(struct file *, struct dir_context *);
/**
+ * is_dx_dir() - check if a directory is using htree indexing
+ * @inode: directory inode
+ *
* Check if the given dir-inode refers to an htree-indexed directory
* (or a directory which could potentially get converted to use htree
* indexing).
@@ -109,7 +112,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL;
- int dir_has_error = 0;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
if (IS_ENCRYPTED(inode)) {
@@ -145,8 +147,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
return err;
}
- offset = ctx->pos & (sb->s_blocksize - 1);
-
while (ctx->pos < inode->i_size) {
struct ext4_map_blocks map;
@@ -155,9 +155,18 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
goto errout;
}
cond_resched();
+ offset = ctx->pos & (sb->s_blocksize - 1);
map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
map.m_len = 1;
err = ext4_map_blocks(NULL, inode, &map, 0);
+ if (err == 0) {
+ /* m_len should never be zero but let's avoid
+ * an infinite loop if it somehow is */
+ if (map.m_len == 0)
+ map.m_len = 1;
+ ctx->pos += map.m_len * sb->s_blocksize;
+ continue;
+ }
if (err > 0) {
pgoff_t index = map.m_pblk >>
(PAGE_SHIFT - inode->i_blkbits);
@@ -176,13 +185,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
}
if (!bh) {
- if (!dir_has_error) {
- EXT4_ERROR_FILE(file, 0,
- "directory contains a "
- "hole at offset %llu",
- (unsigned long long) ctx->pos);
- dir_has_error = 1;
- }
/* corrupt size? Maybe no more blocks to read */
if (ctx->pos > inode->i_blocks << 9)
break;
@@ -192,8 +194,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
/* Check the checksum */
if (!buffer_verified(bh) &&
- !ext4_dirent_csum_verify(inode,
- (struct ext4_dir_entry *)bh->b_data)) {
+ !ext4_dirblock_csum_verify(inode, bh)) {
EXT4_ERROR_FILE(file, 0, "directory fails checksum "
"at offset %llu",
(unsigned long long)ctx->pos);
@@ -674,7 +675,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
return memcmp(str, name->name, len);
}
- return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr);
+ return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false);
}
static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1cb67859e051..bf660aa7a9e0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -421,7 +421,8 @@ struct flex_groups {
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
-#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL))
+#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
+ EXT4_PROJINHERIT_FL))
/* Flags that are appropriate for non-directories/regular files. */
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
@@ -2077,6 +2078,9 @@ struct ext4_filename {
#ifdef CONFIG_FS_ENCRYPTION
struct fscrypt_str crypto_buf;
#endif
+#ifdef CONFIG_UNICODE
+ struct fscrypt_str cf_name;
+#endif
};
#define fname_name(p) ((p)->disk_name.name)
@@ -2302,6 +2306,12 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
struct ext4_group_desc *gdp);
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
+#ifdef CONFIG_UNICODE
+extern void ext4_fname_setup_ci_filename(struct inode *dir,
+ const struct qstr *iname,
+ struct fscrypt_str *fname);
+#endif
+
#ifdef CONFIG_FS_ENCRYPTION
static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst,
const struct fscrypt_name *src)
@@ -2328,6 +2338,10 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
return err;
ext4_fname_from_fscrypt_name(fname, &name);
+
+#ifdef CONFIG_UNICODE
+ ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
+#endif
return 0;
}
@@ -2343,6 +2357,10 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
return err;
ext4_fname_from_fscrypt_name(fname, &name);
+
+#ifdef CONFIG_UNICODE
+ ext4_fname_setup_ci_filename(dir, &dentry->d_name, &fname->cf_name);
+#endif
return 0;
}
@@ -2356,6 +2374,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname)
fname->crypto_buf.name = NULL;
fname->usr_fname = NULL;
fname->disk_name.name = NULL;
+
+#ifdef CONFIG_UNICODE
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+#endif
}
#else /* !CONFIG_FS_ENCRYPTION */
static inline int ext4_fname_setup_filename(struct inode *dir,
@@ -2366,6 +2389,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len;
+
+#ifdef CONFIG_UNICODE
+ ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
+#endif
+
return 0;
}
@@ -2376,7 +2404,13 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname);
}
-static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
+static inline void ext4_fname_free_filename(struct ext4_filename *fname)
+{
+#ifdef CONFIG_UNICODE
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+#endif
+}
#endif /* !CONFIG_FS_ENCRYPTION */
/* dir.c */
@@ -2568,8 +2602,8 @@ extern int ext4_ext_migrate(struct inode *);
extern int ext4_ind_migrate(struct inode *inode);
/* namei.c */
-extern int ext4_dirent_csum_verify(struct inode *inode,
- struct ext4_dir_entry *dirent);
+extern int ext4_dirblock_csum_verify(struct inode *inode,
+ struct buffer_head *bh);
extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
@@ -3070,11 +3104,11 @@ extern int ext4_try_create_inline_dir(handle_t *handle,
extern int ext4_read_inline_dir(struct file *filp,
struct dir_context *ctx,
int *has_inline_data);
-extern int htree_inlinedir_to_tree(struct file *dir_file,
- struct inode *dir, ext4_lblk_t block,
- struct dx_hash_info *hinfo,
- __u32 start_hash, __u32 start_minor_hash,
- int *has_inline_data);
+extern int ext4_inlinedir_to_tree(struct file *dir_file,
+ struct inode *dir, ext4_lblk_t block,
+ struct dx_hash_info *hinfo,
+ __u32 start_hash, __u32 start_minor_hash,
+ int *has_inline_data);
extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_filename *fname,
struct ext4_dir_entry_2 **res_dir,
@@ -3113,14 +3147,13 @@ extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
struct ext4_dir_entry_2 *de,
int blocksize, int csum_size,
unsigned int parent_ino, int dotdot_real_len);
-extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
- unsigned int blocksize);
-extern int ext4_handle_dirty_dirent_node(handle_t *handle,
- struct inode *inode,
- struct buffer_head *bh);
+extern void ext4_initialize_dirent_tail(struct buffer_head *bh,
+ unsigned int blocksize);
+extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
+ struct buffer_head *bh);
extern int ext4_ci_compare(const struct inode *parent,
- const struct qstr *name,
- const struct qstr *entry);
+ const struct qstr *fname,
+ const struct qstr *entry, bool quick);
#define S_SHIFT 12
static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 75a5309f2231..ef8fcf7d0d3b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -361,20 +361,20 @@ static inline int ext4_journal_force_commit(journal_t *journal)
}
static inline int ext4_jbd2_inode_add_write(handle_t *handle,
- struct inode *inode)
+ struct inode *inode, loff_t start_byte, loff_t length)
{
if (ext4_handle_valid(handle))
- return jbd2_journal_inode_add_write(handle,
- EXT4_I(inode)->jinode);
+ return jbd2_journal_inode_ranged_write(handle,
+ EXT4_I(inode)->jinode, start_byte, length);
return 0;
}
static inline int ext4_jbd2_inode_add_wait(handle_t *handle,
- struct inode *inode)
+ struct inode *inode, loff_t start_byte, loff_t length)
{
if (ext4_handle_valid(handle))
- return jbd2_journal_inode_add_wait(handle,
- EXT4_I(inode)->jinode);
+ return jbd2_journal_inode_ranged_wait(handle,
+ EXT4_I(inode)->jinode, start_byte, length);
return 0;
}
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d40ed940001e..92266a2da7d6 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5676,8 +5676,8 @@ out_mutex:
}
/**
- * ext4_swap_extents - Swap extents between two inodes
- *
+ * ext4_swap_extents() - Swap extents between two inodes
+ * @handle: handle for this transaction
* @inode1: First inode
* @inode2: Second inode
* @lblk1: Start block for first inode
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 023a3eb3afa3..7521de2dcf3a 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -1317,7 +1317,6 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
if (!es)
goto out_wrap;
- node = &es->rb_node;
while (*nr_to_scan > 0) {
if (es->es_lblk > end) {
ei->i_es_shrink_lblk = end + 1;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2c5baa5e8291..f4a24a46245e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -165,6 +165,10 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
ret = generic_write_checks(iocb, from);
if (ret <= 0)
return ret;
+
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
/*
* If we have encountered a bitmap-format file, the size limit
* is smaller than s_maxbytes, which is for extent-mapped files.
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 2024d3fa5504..36699a131168 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -294,14 +294,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
}
/**
- * ext4_alloc_branch - allocate and set up a chain of blocks.
- * @handle: handle for this transaction
- * @inode: owner
- * @indirect_blks: number of allocated indirect blocks
- * @blks: number of allocated direct blocks
- * @goal: preferred place for allocation
- * @offsets: offsets (in the blocks) to store the pointers to next.
- * @branch: place to store the chain in.
+ * ext4_alloc_branch() - allocate and set up a chain of blocks
+ * @handle: handle for this transaction
+ * @ar: structure describing the allocation request
+ * @indirect_blks: number of allocated indirect blocks
+ * @offsets: offsets (in the blocks) to store the pointers to next.
+ * @branch: place to store the chain in.
*
* This function allocates blocks, zeroes out all but the last one,
* links them into chain and (if we are synchronous) writes them to disk.
@@ -396,15 +394,11 @@ failed:
}
/**
- * ext4_splice_branch - splice the allocated branch onto inode.
+ * ext4_splice_branch() - splice the allocated branch onto inode.
* @handle: handle for this transaction
- * @inode: owner
- * @block: (logical) number of block we are adding
- * @chain: chain of indirect blocks (with a missing link - see
- * ext4_alloc_branch)
+ * @ar: structure describing the allocation request
* @where: location of missing link
* @num: number of indirect blocks we are adding
- * @blks: number of direct blocks we are adding
*
* This function fills the missing link and does all housekeeping needed in
* inode (->i_blocks, etc.). In case of success we end up with the full
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index f73bc3925282..88cdf3c90bd1 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1132,7 +1132,6 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
{
int err, csum_size = 0, header_size = 0;
struct ext4_dir_entry_2 *de;
- struct ext4_dir_entry_tail *t;
void *target = dir_block->b_data;
/*
@@ -1158,13 +1157,11 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size,
inode->i_sb->s_blocksize - csum_size);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(dir_block->b_data,
- inode->i_sb->s_blocksize);
- initialize_dirent_tail(t, inode->i_sb->s_blocksize);
- }
+ if (csum_size)
+ ext4_initialize_dirent_tail(dir_block,
+ inode->i_sb->s_blocksize);
set_buffer_uptodate(dir_block);
- err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
+ err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err)
return err;
set_buffer_verified(dir_block);
@@ -1327,11 +1324,11 @@ out:
* inlined dir. It returns the number directory entries loaded
* into the tree. If there is an error it is returned in err.
*/
-int htree_inlinedir_to_tree(struct file *dir_file,
- struct inode *dir, ext4_lblk_t block,
- struct dx_hash_info *hinfo,
- __u32 start_hash, __u32 start_minor_hash,
- int *has_inline_data)
+int ext4_inlinedir_to_tree(struct file *dir_file,
+ struct inode *dir, ext4_lblk_t block,
+ struct dx_hash_info *hinfo,
+ __u32 start_hash, __u32 start_minor_hash,
+ int *has_inline_data)
{
int err = 0, count = 0;
unsigned int parent_ino;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c7f77c643008..420fe3deed39 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -731,10 +731,16 @@ out_sem:
!(flags & EXT4_GET_BLOCKS_ZERO) &&
!ext4_is_quota_file(inode) &&
ext4_should_order_data(inode)) {
+ loff_t start_byte =
+ (loff_t)map->m_lblk << inode->i_blkbits;
+ loff_t length = (loff_t)map->m_len << inode->i_blkbits;
+
if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
- ret = ext4_jbd2_inode_add_wait(handle, inode);
+ ret = ext4_jbd2_inode_add_wait(handle, inode,
+ start_byte, length);
else
- ret = ext4_jbd2_inode_add_write(handle, inode);
+ ret = ext4_jbd2_inode_add_write(handle, inode,
+ start_byte, length);
if (ret)
return ret;
}
@@ -1164,8 +1170,9 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
int err = 0;
unsigned blocksize = inode->i_sb->s_blocksize;
unsigned bbits;
- struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
- bool decrypt = false;
+ struct buffer_head *bh, *head, *wait[2];
+ int nr_wait = 0;
+ int i;
BUG_ON(!PageLocked(page));
BUG_ON(from > PAGE_SIZE);
@@ -1217,23 +1224,32 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
!buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
ll_rw_block(REQ_OP_READ, 0, 1, &bh);
- *wait_bh++ = bh;
- decrypt = IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode);
+ wait[nr_wait++] = bh;
}
}
/*
* If we issued read requests, let them complete.
*/
- while (wait_bh > wait) {
- wait_on_buffer(*--wait_bh);
- if (!buffer_uptodate(*wait_bh))
+ for (i = 0; i < nr_wait; i++) {
+ wait_on_buffer(wait[i]);
+ if (!buffer_uptodate(wait[i]))
err = -EIO;
}
- if (unlikely(err))
+ if (unlikely(err)) {
page_zero_new_buffers(page, from, to);
- else if (decrypt)
- err = fscrypt_decrypt_page(page->mapping->host, page,
- PAGE_SIZE, 0, page->index);
+ } else if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) {
+ for (i = 0; i < nr_wait; i++) {
+ int err2;
+
+ err2 = fscrypt_decrypt_pagecache_blocks(page, blocksize,
+ bh_offset(wait[i]));
+ if (err2) {
+ clear_buffer_uptodate(wait[i]);
+ err = err2;
+ }
+ }
+ }
+
return err;
}
#endif
@@ -4065,9 +4081,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) {
/* We expect the key to be set. */
BUG_ON(!fscrypt_has_encryption_key(inode));
- BUG_ON(blocksize != PAGE_SIZE);
- WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
- page, PAGE_SIZE, 0, page->index));
+ WARN_ON_ONCE(fscrypt_decrypt_pagecache_blocks(
+ page, blocksize, bh_offset(bh)));
}
}
if (ext4_should_journal_data(inode)) {
@@ -4085,7 +4100,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
err = 0;
mark_buffer_dirty(bh);
if (ext4_should_order_data(inode))
- err = ext4_jbd2_inode_add_write(handle, inode);
+ err = ext4_jbd2_inode_add_write(handle, inode, from,
+ length);
}
unlock:
@@ -4570,6 +4586,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
struct buffer_head *bh;
struct super_block *sb = inode->i_sb;
ext4_fsblk_t block;
+ struct blk_plug plug;
int inodes_per_block, inode_offset;
iloc->bh = NULL;
@@ -4658,6 +4675,7 @@ make_io:
* If we need to do any I/O, try to pre-readahead extra
* blocks from the inode table.
*/
+ blk_start_plug(&plug);
if (EXT4_SB(sb)->s_inode_readahead_blks) {
ext4_fsblk_t b, end, table;
unsigned num;
@@ -4688,6 +4706,7 @@ make_io:
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
+ blk_finish_plug(&plug);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
EXT4_ERROR_INODE_BLOCK(inode, block,
@@ -5520,6 +5539,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
+ if (unlikely(IS_APPEND(inode) &&
+ (ia_valid & (ATTR_MODE | ATTR_UID |
+ ATTR_GID | ATTR_TIMES_SET))))
+ return -EPERM;
+
error = setattr_prepare(dentry, attr);
if (error)
return error;
@@ -5571,7 +5598,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid & ATTR_SIZE) {
handle_t *handle;
loff_t oldsize = inode->i_size;
- int shrink = (attr->ia_size <= inode->i_size);
+ int shrink = (attr->ia_size < inode->i_size);
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -5585,18 +5612,33 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
inode_inc_iversion(inode);
- if (ext4_should_order_data(inode) &&
- (attr->ia_size < inode->i_size)) {
- error = ext4_begin_ordered_truncate(inode,
+ if (shrink) {
+ if (ext4_should_order_data(inode)) {
+ error = ext4_begin_ordered_truncate(inode,
attr->ia_size);
- if (error)
- goto err_out;
+ if (error)
+ goto err_out;
+ }
+ /*
+ * Blocks are going to be removed from the inode. Wait
+ * for dio in flight.
+ */
+ inode_dio_wait(inode);
+ }
+
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ rc = ext4_break_layouts(inode);
+ if (rc) {
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ return rc;
}
+
if (attr->ia_size != inode->i_size) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
- goto err_out;
+ goto out_mmap_sem;
}
if (ext4_handle_valid(handle) && shrink) {
error = ext4_orphan_add(handle, inode);
@@ -5624,42 +5666,31 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
i_size_write(inode, attr->ia_size);
up_write(&EXT4_I(inode)->i_data_sem);
ext4_journal_stop(handle);
- if (error) {
- if (orphan && inode->i_nlink)
- ext4_orphan_del(NULL, inode);
- goto err_out;
+ if (error)
+ goto out_mmap_sem;
+ if (!shrink) {
+ pagecache_isize_extended(inode, oldsize,
+ inode->i_size);
+ } else if (ext4_should_journal_data(inode)) {
+ ext4_wait_for_tail_page_commit(inode);
}
}
- if (!shrink) {
- pagecache_isize_extended(inode, oldsize, inode->i_size);
- } else {
- /*
- * Blocks are going to be removed from the inode. Wait
- * for dio in flight.
- */
- inode_dio_wait(inode);
- }
- if (orphan && ext4_should_journal_data(inode))
- ext4_wait_for_tail_page_commit(inode);
- down_write(&EXT4_I(inode)->i_mmap_sem);
-
- rc = ext4_break_layouts(inode);
- if (rc) {
- up_write(&EXT4_I(inode)->i_mmap_sem);
- error = rc;
- goto err_out;
- }
/*
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
*/
truncate_pagecache(inode, inode->i_size);
- if (shrink) {
+ /*
+ * Call ext4_truncate() even if i_size didn't change to
+ * truncate possible preallocated blocks.
+ */
+ if (attr->ia_size <= oldsize) {
rc = ext4_truncate(inode);
if (rc)
error = rc;
}
+out_mmap_sem:
up_write(&EXT4_I(inode)->i_mmap_sem);
}
@@ -6190,6 +6221,9 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
get_block_t *get_block;
int retries = 0;
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return VM_FAULT_SIGBUS;
+
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e486e49b31ed..74648d42c69b 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -269,6 +269,29 @@ static int uuid_is_zero(__u8 u[16])
}
#endif
+/*
+ * If immutable is set and we are not clearing it, we're not allowed to change
+ * anything else in the inode. Don't error out if we're only trying to set
+ * immutable on an immutable file.
+ */
+static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
+ unsigned int flags)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned int oldflags = ei->i_flags;
+
+ if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL))
+ return 0;
+
+ if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL))
+ return -EPERM;
+ if (ext4_has_feature_project(inode->i_sb) &&
+ __kprojid_val(ei->i_projid) != new_projid)
+ return -EPERM;
+
+ return 0;
+}
+
static int ext4_ioctl_setflags(struct inode *inode,
unsigned int flags)
{
@@ -340,6 +363,20 @@ static int ext4_ioctl_setflags(struct inode *inode,
}
}
+ /*
+ * Wait for all pending directio and then flush all the dirty pages
+ * for this file. The flush marks all the pages readonly, so any
+ * subsequent attempt to write to the file (particularly mmap pages)
+ * will come through the filesystem and fail.
+ */
+ if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) &&
+ (flags & EXT4_IMMUTABLE_FL)) {
+ inode_dio_wait(inode);
+ err = filemap_write_and_wait(inode->i_mapping);
+ if (err)
+ goto flags_out;
+ }
+
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
@@ -742,6 +779,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return ext4_ioc_getfsmap(sb, (void __user *)arg);
case EXT4_IOC_GETFLAGS:
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
+ if (S_ISREG(inode->i_mode))
+ flags &= ~EXT4_PROJINHERIT_FL;
return put_user(flags, (int __user *) arg);
case EXT4_IOC_SETFLAGS: {
int err;
@@ -769,7 +808,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return err;
inode_lock(inode);
- err = ext4_ioctl_setflags(inode, flags);
+ err = ext4_ioctl_check_immutable(inode,
+ from_kprojid(&init_user_ns, ei->i_projid),
+ flags);
+ if (!err)
+ err = ext4_ioctl_setflags(inode, flags);
inode_unlock(inode);
mnt_drop_write_file(filp);
return err;
@@ -1139,6 +1182,9 @@ resizefs_out:
goto out;
flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
(flags & EXT4_FL_XFLAG_VISIBLE);
+ err = ext4_ioctl_check_immutable(inode, fa.fsx_projid, flags);
+ if (err)
+ goto out;
err = ext4_ioctl_setflags(inode, flags);
if (err)
goto out;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 99ba720dbb7a..a3e2767bdf2f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4696,8 +4696,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
* ext4_free_blocks() -- Free given blocks and update quota
* @handle: handle for this transaction
* @inode: inode
- * @block: start physical block to free
- * @count: number of blocks to count
+ * @bh: optional buffer of the block to be freed
+ * @block: starting physical block to be freed
+ * @count: number of blocks to be freed
* @flags: flags used by ext4_free_blocks
*/
void ext4_free_blocks(handle_t *handle, struct inode *inode,
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 1083a9f3f16a..30ce3dc69378 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -13,11 +13,10 @@
#include "ext4_extents.h"
/**
- * get_ext_path - Find an extent path for designated logical block number.
- *
- * @inode: an inode which is searched
+ * get_ext_path() - Find an extent path for designated logical block number.
+ * @inode: inode to be searched
* @lblock: logical block number to find an extent path
- * @path: pointer to an extent path pointer (for output)
+ * @ppath: pointer to an extent path pointer (for output)
*
* ext4_find_extent wrapper. Return 0 on success, or a negative error value
* on failure.
@@ -42,8 +41,9 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
}
/**
- * ext4_double_down_write_data_sem - Acquire two inodes' write lock
- * of i_data_sem
+ * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem
+ * @first: inode to be locked
+ * @second: inode to be locked
*
* Acquire write lock of i_data_sem of the two inodes
*/
@@ -390,7 +390,8 @@ data_copy:
/* Even in case of data=writeback it is reasonable to pin
* inode to transaction, to prevent unexpected data loss */
- *err = ext4_jbd2_inode_add_write(handle, orig_inode);
+ *err = ext4_jbd2_inode_add_write(handle, orig_inode,
+ (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size);
unlock_pages:
unlock_page(pagep[0]);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index cd01c4a67ffb..129029534075 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -82,8 +82,18 @@ static struct buffer_head *ext4_append(handle_t *handle,
static int ext4_dx_csum_verify(struct inode *inode,
struct ext4_dir_entry *dirent);
+/*
+ * Hints to ext4_read_dirblock regarding whether we expect a directory
+ * block being read to be an index block, or a block containing
+ * directory entries (and if the latter, whether it was found via a
+ * logical block in an htree index block). This is used to control
+ * what sort of sanity checkinig ext4_read_dirblock() will do on the
+ * directory block read from the storage device. EITHER will means
+ * the caller doesn't know what kind of directory block will be read,
+ * so no specific verification will be done.
+ */
typedef enum {
- EITHER, INDEX, DIRENT
+ EITHER, INDEX, DIRENT, DIRENT_HTREE
} dirblock_type_t;
#define ext4_read_dirblock(inode, block, type) \
@@ -109,11 +119,14 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
return bh;
}
- if (!bh) {
+ if (!bh && (type == INDEX || type == DIRENT_HTREE)) {
ext4_error_inode(inode, func, line, block,
- "Directory hole found");
+ "Directory hole found for htree %s block",
+ (type == INDEX) ? "index" : "leaf");
return ERR_PTR(-EFSCORRUPTED);
}
+ if (!bh)
+ return NULL;
dirent = (struct ext4_dir_entry *) bh->b_data;
/* Determine whether or not we have an index block */
if (is_dx(inode)) {
@@ -150,7 +163,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
}
}
if (!is_dx_block) {
- if (ext4_dirent_csum_verify(inode, dirent))
+ if (ext4_dirblock_csum_verify(inode, bh))
set_buffer_verified(bh);
else {
ext4_error_inode(inode, func, line, block,
@@ -280,9 +293,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
struct inode *dir, struct inode *inode);
/* checksumming functions */
-void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
- unsigned int blocksize)
+void ext4_initialize_dirent_tail(struct buffer_head *bh,
+ unsigned int blocksize)
{
+ struct ext4_dir_entry_tail *t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
+
memset(t, 0, sizeof(struct ext4_dir_entry_tail));
t->det_rec_len = ext4_rec_len_to_disk(
sizeof(struct ext4_dir_entry_tail), blocksize);
@@ -291,17 +306,17 @@ void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
/* Walk through a dirent block to find a checksum "dirent" at the tail */
static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
- struct ext4_dir_entry *de)
+ struct buffer_head *bh)
{
struct ext4_dir_entry_tail *t;
#ifdef PARANOID
struct ext4_dir_entry *d, *top;
- d = de;
- top = (struct ext4_dir_entry *)(((void *)de) +
+ d = (struct ext4_dir_entry *)bh->b_data;
+ top = (struct ext4_dir_entry *)(bh->b_data +
(EXT4_BLOCK_SIZE(inode->i_sb) -
- sizeof(struct ext4_dir_entry_tail)));
+ sizeof(struct ext4_dir_entry_tail)));
while (d < top && d->rec_len)
d = (struct ext4_dir_entry *)(((void *)d) +
le16_to_cpu(d->rec_len));
@@ -311,7 +326,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
t = (struct ext4_dir_entry_tail *)d;
#else
- t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
+ t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb));
#endif
if (t->det_reserved_zero1 ||
@@ -323,8 +338,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
return t;
}
-static __le32 ext4_dirent_csum(struct inode *inode,
- struct ext4_dir_entry *dirent, int size)
+static __le32 ext4_dirblock_csum(struct inode *inode, void *dirent, int size)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
@@ -344,49 +358,49 @@ static void __warn_no_space_for_csum(struct inode *inode, const char *func,
"No space for directory leaf checksum. Please run e2fsck -D.");
}
-int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
+int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh)
{
struct ext4_dir_entry_tail *t;
if (!ext4_has_metadata_csum(inode->i_sb))
return 1;
- t = get_dirent_tail(inode, dirent);
+ t = get_dirent_tail(inode, bh);
if (!t) {
warn_no_space_for_csum(inode);
return 0;
}
- if (t->det_checksum != ext4_dirent_csum(inode, dirent,
- (void *)t - (void *)dirent))
+ if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data,
+ (char *)t - bh->b_data))
return 0;
return 1;
}
-static void ext4_dirent_csum_set(struct inode *inode,
- struct ext4_dir_entry *dirent)
+static void ext4_dirblock_csum_set(struct inode *inode,
+ struct buffer_head *bh)
{
struct ext4_dir_entry_tail *t;
if (!ext4_has_metadata_csum(inode->i_sb))
return;
- t = get_dirent_tail(inode, dirent);
+ t = get_dirent_tail(inode, bh);
if (!t) {
warn_no_space_for_csum(inode);
return;
}
- t->det_checksum = ext4_dirent_csum(inode, dirent,
- (void *)t - (void *)dirent);
+ t->det_checksum = ext4_dirblock_csum(inode, bh->b_data,
+ (char *)t - bh->b_data);
}
-int ext4_handle_dirty_dirent_node(handle_t *handle,
- struct inode *inode,
- struct buffer_head *bh)
+int ext4_handle_dirty_dirblock(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *bh)
{
- ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
+ ext4_dirblock_csum_set(inode, bh);
return ext4_handle_dirty_metadata(handle, inode, bh);
}
@@ -980,7 +994,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
(unsigned long)block));
- bh = ext4_read_dirblock(dir, block, DIRENT);
+ bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
if (IS_ERR(bh))
return PTR_ERR(bh);
@@ -1090,10 +1104,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
if (ext4_has_inline_data(dir)) {
int has_inline_data = 1;
- count = htree_inlinedir_to_tree(dir_file, dir, 0,
- &hinfo, start_hash,
- start_minor_hash,
- &has_inline_data);
+ count = ext4_inlinedir_to_tree(dir_file, dir, 0,
+ &hinfo, start_hash,
+ start_minor_hash,
+ &has_inline_data);
if (has_inline_data) {
*next_hash = ~0;
return count;
@@ -1259,19 +1273,24 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
#ifdef CONFIG_UNICODE
/*
* Test whether a case-insensitive directory entry matches the filename
- * being searched for.
+ * being searched for. If quick is set, assume the name being looked up
+ * is already in the casefolded form.
*
* Returns: 0 if the directory entry matches, more than 0 if it
* doesn't match or less than zero on error.
*/
int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
- const struct qstr *entry)
+ const struct qstr *entry, bool quick)
{
const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
const struct unicode_map *um = sbi->s_encoding;
int ret;
- ret = utf8_strncasecmp(um, name, entry);
+ if (quick)
+ ret = utf8_strncasecmp_folded(um, name, entry);
+ else
+ ret = utf8_strncasecmp(um, name, entry);
+
if (ret < 0) {
/* Handle invalid character sequence as either an error
* or as an opaque byte sequence.
@@ -1287,6 +1306,32 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
return ret;
}
+
+void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
+ struct fscrypt_str *cf_name)
+{
+ int len;
+
+ if (!IS_CASEFOLDED(dir)) {
+ cf_name->name = NULL;
+ return;
+ }
+
+ cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
+ if (!cf_name->name)
+ return;
+
+ len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding,
+ iname, cf_name->name,
+ EXT4_NAME_LEN);
+ if (len <= 0) {
+ kfree(cf_name->name);
+ cf_name->name = NULL;
+ return;
+ }
+ cf_name->len = (unsigned) len;
+
+}
#endif
/*
@@ -1313,8 +1358,15 @@ static inline bool ext4_match(const struct inode *parent,
#endif
#ifdef CONFIG_UNICODE
- if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent))
- return (ext4_ci_compare(parent, fname->usr_fname, &entry) == 0);
+ if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) {
+ if (fname->cf_name.name) {
+ struct qstr cf = {.name = fname->cf_name.name,
+ .len = fname->cf_name.len};
+ return !ext4_ci_compare(parent, &cf, &entry, true);
+ }
+ return !ext4_ci_compare(parent, fname->usr_fname, &entry,
+ false);
+ }
#endif
return fscrypt_match_name(&f, de->name, de->name_len);
@@ -1484,8 +1536,7 @@ restart:
if (!buffer_verified(bh) &&
!is_dx_internal_node(dir, block,
(struct ext4_dir_entry *)bh->b_data) &&
- !ext4_dirent_csum_verify(dir,
- (struct ext4_dir_entry *)bh->b_data)) {
+ !ext4_dirblock_csum_verify(dir, bh)) {
EXT4_ERROR_INODE(dir, "checksumming directory "
"block %lu", (unsigned long)block);
brelse(bh);
@@ -1586,7 +1637,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
return (struct buffer_head *) frame;
do {
block = dx_get_block(frame->at);
- bh = ext4_read_dirblock(dir, block, DIRENT);
+ bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
if (IS_ERR(bh))
goto errout;
@@ -1769,7 +1820,6 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
char *data1 = (*bh)->b_data, *data2;
unsigned split, move, size;
struct ext4_dir_entry_2 *de = NULL, *de2;
- struct ext4_dir_entry_tail *t;
int csum_size = 0;
int err = 0, i;
@@ -1830,11 +1880,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
(char *) de2,
blocksize);
if (csum_size) {
- t = EXT4_DIRENT_TAIL(data2, blocksize);
- initialize_dirent_tail(t, blocksize);
-
- t = EXT4_DIRENT_TAIL(data1, blocksize);
- initialize_dirent_tail(t, blocksize);
+ ext4_initialize_dirent_tail(*bh, blocksize);
+ ext4_initialize_dirent_tail(bh2, blocksize);
}
dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1,
@@ -1848,7 +1895,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
de = de2;
}
dx_insert_block(frame, hash2 + continued, newblock);
- err = ext4_handle_dirty_dirent_node(handle, dir, bh2);
+ err = ext4_handle_dirty_dirblock(handle, dir, bh2);
if (err)
goto journal_error;
err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
@@ -1976,7 +2023,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
inode_inc_iversion(dir);
ext4_mark_inode_dirty(handle, dir);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_dirent_node(handle, dir, bh);
+ err = ext4_handle_dirty_dirblock(handle, dir, bh);
if (err)
ext4_std_error(dir->i_sb, err);
return 0;
@@ -1995,8 +2042,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct dx_entry *entries;
struct ext4_dir_entry_2 *de, *de2;
- struct ext4_dir_entry_tail *t;
- char *data1, *top;
+ char *data2, *top;
unsigned len;
int retval;
unsigned blocksize;
@@ -2036,21 +2082,18 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
return PTR_ERR(bh2);
}
ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
- data1 = bh2->b_data;
+ data2 = bh2->b_data;
- memcpy (data1, de, len);
- de = (struct ext4_dir_entry_2 *) data1;
- top = data1 + len;
+ memcpy(data2, de, len);
+ de = (struct ext4_dir_entry_2 *) data2;
+ top = data2 + len;
while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
de = de2;
- de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
- (char *) de,
- blocksize);
+ de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
+ (char *) de, blocksize);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(data1, blocksize);
- initialize_dirent_tail(t, blocksize);
- }
+ if (csum_size)
+ ext4_initialize_dirent_tail(bh2, blocksize);
/* Initialize the root; the dot dirents already exist */
de = (struct ext4_dir_entry_2 *) (&root->dotdot);
@@ -2080,7 +2123,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
if (retval)
goto out_frames;
- retval = ext4_handle_dirty_dirent_node(handle, dir, bh2);
+ retval = ext4_handle_dirty_dirblock(handle, dir, bh2);
if (retval)
goto out_frames;
@@ -2120,7 +2163,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *dir = d_inode(dentry->d_parent);
struct buffer_head *bh = NULL;
struct ext4_dir_entry_2 *de;
- struct ext4_dir_entry_tail *t;
struct super_block *sb;
struct ext4_sb_info *sbi;
struct ext4_filename fname;
@@ -2170,6 +2212,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
blocks = dir->i_size >> sb->s_blocksize_bits;
for (block = 0; block < blocks; block++) {
bh = ext4_read_dirblock(dir, block, DIRENT);
+ if (bh == NULL) {
+ bh = ext4_bread(handle, dir, block,
+ EXT4_GET_BLOCKS_CREATE);
+ goto add_to_new_block;
+ }
if (IS_ERR(bh)) {
retval = PTR_ERR(bh);
bh = NULL;
@@ -2190,6 +2237,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
brelse(bh);
}
bh = ext4_append(handle, dir, &block);
+add_to_new_block:
if (IS_ERR(bh)) {
retval = PTR_ERR(bh);
bh = NULL;
@@ -2199,10 +2247,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
de->inode = 0;
de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
- initialize_dirent_tail(t, blocksize);
- }
+ if (csum_size)
+ ext4_initialize_dirent_tail(bh, blocksize);
retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
out:
@@ -2234,7 +2280,7 @@ again:
return PTR_ERR(frame);
entries = frame->entries;
at = frame->at;
- bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
+ bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE);
if (IS_ERR(bh)) {
err = PTR_ERR(bh);
bh = NULL;
@@ -2460,7 +2506,7 @@ static int ext4_delete_entry(handle_t *handle,
goto out;
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_dirent_node(handle, dir, bh);
+ err = ext4_handle_dirty_dirblock(handle, dir, bh);
if (unlikely(err))
goto out;
@@ -2662,7 +2708,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
{
struct buffer_head *dir_block = NULL;
struct ext4_dir_entry_2 *de;
- struct ext4_dir_entry_tail *t;
ext4_lblk_t block = 0;
unsigned int blocksize = dir->i_sb->s_blocksize;
int csum_size = 0;
@@ -2686,13 +2731,11 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
de = (struct ext4_dir_entry_2 *)dir_block->b_data;
ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
set_nlink(inode, 2);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
- initialize_dirent_tail(t, blocksize);
- }
+ if (csum_size)
+ ext4_initialize_dirent_tail(dir_block, blocksize);
BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
+ err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err)
goto out;
set_buffer_verified(dir_block);
@@ -2782,7 +2825,10 @@ bool ext4_empty_dir(struct inode *inode)
EXT4_ERROR_INODE(inode, "invalid size");
return true;
}
- bh = ext4_read_dirblock(inode, 0, EITHER);
+ /* The first directory block must not be a hole,
+ * so treat it as DIRENT_HTREE
+ */
+ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
if (IS_ERR(bh))
return true;
@@ -2804,6 +2850,10 @@ bool ext4_empty_dir(struct inode *inode)
brelse(bh);
lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
bh = ext4_read_dirblock(inode, lblock, EITHER);
+ if (bh == NULL) {
+ offset += sb->s_blocksize;
+ continue;
+ }
if (IS_ERR(bh))
return true;
de = (struct ext4_dir_entry_2 *) bh->b_data;
@@ -3369,7 +3419,10 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
struct buffer_head *bh;
if (!ext4_has_inline_data(inode)) {
- bh = ext4_read_dirblock(inode, 0, EITHER);
+ /* The first directory block must not be a hole, so
+ * treat it as DIRENT_HTREE
+ */
+ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
if (IS_ERR(bh)) {
*retval = PTR_ERR(bh);
return NULL;
@@ -3430,9 +3483,8 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
ent->inode,
ent->dir_bh);
} else {
- retval = ext4_handle_dirty_dirent_node(handle,
- ent->inode,
- ent->dir_bh);
+ retval = ext4_handle_dirty_dirblock(handle, ent->inode,
+ ent->dir_bh);
}
} else {
retval = ext4_mark_inode_dirty(handle, ent->inode);
@@ -3462,8 +3514,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
ext4_mark_inode_dirty(handle, ent->dir);
BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
if (!ent->inlined) {
- retval = ext4_handle_dirty_dirent_node(handle,
- ent->dir, ent->bh);
+ retval = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh);
if (unlikely(retval)) {
ext4_std_error(ent->dir->i_sb, retval);
return retval;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 4690618a92e9..a18a47a2a1d1 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -66,9 +66,7 @@ static void ext4_finish_bio(struct bio *bio)
bio_for_each_segment_all(bvec, bio, iter_all) {
struct page *page = bvec->bv_page;
-#ifdef CONFIG_FS_ENCRYPTION
- struct page *data_page = NULL;
-#endif
+ struct page *bounce_page = NULL;
struct buffer_head *bh, *head;
unsigned bio_start = bvec->bv_offset;
unsigned bio_end = bio_start + bvec->bv_len;
@@ -78,13 +76,10 @@ static void ext4_finish_bio(struct bio *bio)
if (!page)
continue;
-#ifdef CONFIG_FS_ENCRYPTION
- if (!page->mapping) {
- /* The bounce data pages are unmapped. */
- data_page = page;
- fscrypt_pullback_bio_page(&page, false);
+ if (fscrypt_is_bounce_page(page)) {
+ bounce_page = page;
+ page = fscrypt_pagecache_page(bounce_page);
}
-#endif
if (bio->bi_status) {
SetPageError(page);
@@ -111,10 +106,7 @@ static void ext4_finish_bio(struct bio *bio)
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
local_irq_restore(flags);
if (!under_io) {
-#ifdef CONFIG_FS_ENCRYPTION
- if (data_page)
- fscrypt_restore_control_page(data_page);
-#endif
+ fscrypt_free_bounce_page(bounce_page);
end_page_writeback(page);
}
}
@@ -415,7 +407,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
struct writeback_control *wbc,
bool keep_towrite)
{
- struct page *data_page = NULL;
+ struct page *bounce_page = NULL;
struct inode *inode = page->mapping->host;
unsigned block_start;
struct buffer_head *bh, *head;
@@ -475,14 +467,22 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
bh = head = page_buffers(page);
+ /*
+ * If any blocks are being written to an encrypted file, encrypt them
+ * into a bounce page. For simplicity, just encrypt until the last
+ * block which might be needed. This may cause some unneeded blocks
+ * (e.g. holes) to be unnecessarily encrypted, but this is rare and
+ * can't happen in the common case of blocksize == PAGE_SIZE.
+ */
if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && nr_to_submit) {
gfp_t gfp_flags = GFP_NOFS;
+ unsigned int enc_bytes = round_up(len, i_blocksize(inode));
retry_encrypt:
- data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0,
- page->index, gfp_flags);
- if (IS_ERR(data_page)) {
- ret = PTR_ERR(data_page);
+ bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes,
+ 0, gfp_flags);
+ if (IS_ERR(bounce_page)) {
+ ret = PTR_ERR(bounce_page);
if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
if (io->io_bio) {
ext4_io_submit(io);
@@ -491,7 +491,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
gfp_flags |= __GFP_NOFAIL;
goto retry_encrypt;
}
- data_page = NULL;
+ bounce_page = NULL;
goto out;
}
}
@@ -500,8 +500,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do {
if (!buffer_async_write(bh))
continue;
- ret = io_submit_add_bh(io, inode,
- data_page ? data_page : page, bh);
+ ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
@@ -517,8 +516,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
/* Error stopped previous loop? Clean up buffers... */
if (ret) {
out:
- if (data_page)
- fscrypt_restore_control_page(data_page);
+ fscrypt_free_bounce_page(bounce_page);
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
redirty_page_for_writepage(wbc, page);
do {
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 04b4f53f0659..b3cd7655a6ff 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -230,6 +230,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(journal_task),
NULL,
};
+ATTRIBUTE_GROUPS(ext4);
/* Features this copy of ext4 supports */
EXT4_ATTR_FEATURE(lazy_itable_init);
@@ -256,6 +257,7 @@ static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(metadata_csum_seed),
NULL,
};
+ATTRIBUTE_GROUPS(ext4_feat);
static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi)
{
@@ -374,13 +376,13 @@ static const struct sysfs_ops ext4_attr_ops = {
};
static struct kobj_type ext4_sb_ktype = {
- .default_attrs = ext4_attrs,
+ .default_groups = ext4_groups,
.sysfs_ops = &ext4_attr_ops,
.release = ext4_sb_release,
};
static struct kobj_type ext4_feat_ktype = {
- .default_attrs = ext4_feat_attrs,
+ .default_groups = ext4_feat_groups,
.sysfs_ops = &ext4_attr_ops,
.release = (void (*)(struct kobject *))kfree,
};
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index eda4181d2092..a546ac8685ea 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -185,7 +185,7 @@ static void f2fs_write_end_io(struct bio *bio)
continue;
}
- fscrypt_pullback_bio_page(&page, true);
+ fscrypt_finalize_bounce_page(&page);
if (unlikely(bio->bi_status)) {
mapping_set_error(page->mapping, -EIO);
@@ -362,10 +362,9 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
bio_for_each_segment_all(bvec, io->bio, iter_all) {
- if (bvec->bv_page->mapping)
- target = bvec->bv_page;
- else
- target = fscrypt_control_page(bvec->bv_page);
+ target = bvec->bv_page;
+ if (fscrypt_is_bounce_page(target))
+ target = fscrypt_pagecache_page(target);
if (inode && inode == target->mapping->host)
return true;
@@ -1727,8 +1726,9 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
retry_encrypt:
- fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
- PAGE_SIZE, 0, fio->page->index, gfp_flags);
+ fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page,
+ PAGE_SIZE, 0,
+ gfp_flags);
if (IS_ERR(fio->encrypted_page)) {
/* flush pending IOs and wait for a while in the ENOMEM case */
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
@@ -1900,8 +1900,7 @@ got_it:
err = f2fs_inplace_write_data(fio);
if (err) {
if (f2fs_encrypted_file(inode))
- fscrypt_pullback_bio_page(&fio->encrypted_page,
- true);
+ fscrypt_finalize_bounce_page(&fio->encrypted_page);
if (PageWriteback(page))
end_page_writeback(page);
} else {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e41cbe8e81b9..9ebfb1b28430 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -715,6 +715,7 @@ void wbc_detach_inode(struct writeback_control *wbc)
void wbc_account_io(struct writeback_control *wbc, struct page *page,
size_t bytes)
{
+ struct cgroup_subsys_state *css;
int id;
/*
@@ -726,7 +727,12 @@ void wbc_account_io(struct writeback_control *wbc, struct page *page,
if (!wbc->wb)
return;
- id = mem_cgroup_css_from_page(page)->id;
+ css = mem_cgroup_css_from_page(page);
+ /* dead cgroups shouldn't contribute to inode ownership arbitration */
+ if (!(css->flags & CSS_ONLINE))
+ return;
+
+ id = css->id;
if (id == wbc->wb_id) {
wbc->wb_bytes += bytes;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b8f9c83835d5..5ae2828beb00 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3112,9 +3112,9 @@ out:
return err;
}
-static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t len, unsigned int flags)
+static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
{
struct fuse_file *ff_in = file_in->private_data;
struct fuse_file *ff_out = file_out->private_data;
@@ -3142,6 +3142,9 @@ static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (fc->no_copy_file_range)
return -EOPNOTSUPP;
+ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
+ return -EXDEV;
+
if (fc->writeback_cache) {
inode_lock(inode_in);
err = fuse_writeback_range(inode_in, pos_in, pos_in + len);
@@ -3152,6 +3155,10 @@ static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
inode_lock(inode_out);
+ err = file_modified(file_out);
+ if (err)
+ goto out;
+
if (fc->writeback_cache) {
err = fuse_writeback_range(inode_out, pos_out, pos_out + len);
if (err)
@@ -3190,10 +3197,26 @@ out:
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
inode_unlock(inode_out);
+ file_accessed(file_in);
return err;
}
+static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off,
+ size_t len, unsigned int flags)
+{
+ ssize_t ret;
+
+ ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off,
+ len, flags);
+
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(src_file, src_off, dst_file,
+ dst_off, len, flags);
+ return ret;
+}
+
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
.read_iter = fuse_file_read_iter,
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index abeac61cfed3..f42048cc5454 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -82,15 +82,11 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
}
/**
- * gfs2_writepage_common - Common bits of writepage
- * @page: The page to be written
+ * gfs2_writepage - Write page for writeback mappings
+ * @page: The page
* @wbc: The writeback control
- *
- * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
*/
-
-static int gfs2_writepage_common(struct page *page,
- struct writeback_control *wbc)
+static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
@@ -109,7 +105,9 @@ static int gfs2_writepage_common(struct page *page,
page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
goto out;
}
- return 1;
+
+ return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
+
redirty:
redirty_page_for_writepage(wbc, page);
out:
@@ -117,24 +115,6 @@ out:
return 0;
}
-/**
- * gfs2_writepage - Write page for writeback mappings
- * @page: The page
- * @wbc: The writeback control
- *
- */
-
-static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
-{
- int ret;
-
- ret = gfs2_writepage_common(page, wbc);
- if (ret <= 0)
- return ret;
-
- return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
-}
-
/* This is the same as calling block_write_full_page, but it also
* writes pages outside of i_size
*/
@@ -454,8 +434,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping,
*
* Returns: errno
*/
-
-int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
+static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
{
struct buffer_head *dibh;
u64 dsize = i_size_read(&ip->i_inode);
@@ -518,7 +497,7 @@ static int __gfs2_readpage(void *file, struct page *page)
error = mpage_readpage(page, gfs2_block_map);
}
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return -EIO;
return error;
@@ -635,7 +614,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
gfs2_glock_dq(&gh);
out_uninit:
gfs2_holder_uninit(&gh);
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
ret = -EIO;
return ret;
}
@@ -686,47 +665,6 @@ out:
}
/**
- * gfs2_stuffed_write_end - Write end for stuffed files
- * @inode: The inode
- * @dibh: The buffer_head containing the on-disk inode
- * @pos: The file position
- * @copied: How much was actually copied by the VFS
- * @page: The page
- *
- * This copies the data from the page into the inode block after
- * the inode data structure itself.
- *
- * Returns: copied bytes or errno
- */
-int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
- loff_t pos, unsigned copied,
- struct page *page)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- u64 to = pos + copied;
- void *kaddr;
- unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
-
- BUG_ON(pos + copied > gfs2_max_stuffed_size(ip));
-
- kaddr = kmap_atomic(page);
- memcpy(buf + pos, kaddr + pos, copied);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
-
- WARN_ON(!PageUptodate(page));
- unlock_page(page);
- put_page(page);
-
- if (copied) {
- if (inode->i_size < to)
- i_size_write(inode, to);
- mark_inode_dirty(inode);
- }
- return copied;
-}
-
-/**
* jdata_set_page_dirty - Page dirtying function
* @page: The page to dirty
*
@@ -759,7 +697,7 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
return 0;
if (!gfs2_is_stuffed(ip))
- dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
+ dblock = iomap_bmap(mapping, lblock, &gfs2_iomap_ops);
gfs2_glock_dq_uninit(&i_gh);
@@ -888,7 +826,7 @@ cannot_release:
return 0;
}
-static const struct address_space_operations gfs2_writeback_aops = {
+static const struct address_space_operations gfs2_aops = {
.writepage = gfs2_writepage,
.writepages = gfs2_writepages,
.readpage = gfs2_readpage,
@@ -902,21 +840,6 @@ static const struct address_space_operations gfs2_writeback_aops = {
.error_remove_page = generic_error_remove_page,
};
-static const struct address_space_operations gfs2_ordered_aops = {
- .writepage = gfs2_writepage,
- .writepages = gfs2_writepages,
- .readpage = gfs2_readpage,
- .readpages = gfs2_readpages,
- .set_page_dirty = __set_page_dirty_buffers,
- .bmap = gfs2_bmap,
- .invalidatepage = gfs2_invalidatepage,
- .releasepage = gfs2_releasepage,
- .direct_IO = noop_direct_IO,
- .migratepage = buffer_migrate_page,
- .is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_page = generic_error_remove_page,
-};
-
static const struct address_space_operations gfs2_jdata_aops = {
.writepage = gfs2_jdata_writepage,
.writepages = gfs2_jdata_writepages,
@@ -932,15 +855,8 @@ static const struct address_space_operations gfs2_jdata_aops = {
void gfs2_set_aops(struct inode *inode)
{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
-
- if (gfs2_is_jdata(ip))
+ if (gfs2_is_jdata(GFS2_I(inode)))
inode->i_mapping->a_ops = &gfs2_jdata_aops;
- else if (gfs2_is_writeback(sdp))
- inode->i_mapping->a_ops = &gfs2_writeback_aops;
- else if (gfs2_is_ordered(sdp))
- inode->i_mapping->a_ops = &gfs2_ordered_aops;
else
- BUG();
+ inode->i_mapping->a_ops = &gfs2_aops;
}
diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
index fa8e5d0144dd..ff9877a68780 100644
--- a/fs/gfs2/aops.h
+++ b/fs/gfs2/aops.h
@@ -8,10 +8,6 @@
#include "incore.h"
-extern int stuffed_readpage(struct gfs2_inode *ip, struct page *page);
-extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
- loff_t pos, unsigned copied,
- struct page *page);
extern void adjust_fs_space(struct inode *inode);
extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
unsigned int from, unsigned int len);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 93ea1d529aa3..79581b9bdebb 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -595,7 +595,6 @@ enum alloc_state {
* gfs2_iomap_alloc - Build a metadata tree of the requested height
* @inode: The GFS2 inode
* @iomap: The iomap structure
- * @flags: iomap flags
* @mp: The metapath, with proper height information calculated
*
* In this routine we may have to alloc:
@@ -622,7 +621,7 @@ enum alloc_state {
*/
static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
- unsigned flags, struct metapath *mp)
+ struct metapath *mp)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1088,7 +1087,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
}
if (iomap->type == IOMAP_HOLE) {
- ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
+ ret = gfs2_iomap_alloc(inode, iomap, mp);
if (ret) {
gfs2_trans_end(sdp);
gfs2_inplace_release(ip);
@@ -1182,6 +1181,8 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
if (ip->i_qadata && ip->i_qadata->qa_qd_num)
gfs2_quota_unlock(ip);
+ if (iomap->flags & IOMAP_F_SIZE_CHANGED)
+ mark_inode_dirty(inode);
gfs2_write_unlock(inode);
out:
@@ -1232,7 +1233,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
if (create) {
ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
if (!ret && iomap.type == IOMAP_HOLE)
- ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
+ ret = gfs2_iomap_alloc(inode, &iomap, &mp);
release_metapath(&mp);
} else {
ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
@@ -1462,7 +1463,7 @@ int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
if (!ret && iomap->type == IOMAP_HOLE)
- ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
+ ret = gfs2_iomap_alloc(inode, iomap, &mp);
release_metapath(&mp);
return ret;
}
@@ -1862,9 +1863,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
gfs2_assert_withdraw(sdp, bh);
if (gfs2_assert_withdraw(sdp,
prev_bnr != bh->b_blocknr)) {
- printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
- "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
- sdp->sd_fsname,
+ fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u,"
+ "s_h:%u, mp_h:%u\n",
(unsigned long long)ip->i_no_addr,
prev_bnr, ip->i_height, strip_h, mp_h);
}
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 88e4f955c518..6f35d19eec25 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -750,7 +750,7 @@ static struct gfs2_dirent *gfs2_dirent_split_alloc(struct inode *inode,
struct gfs2_dirent *dent;
dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
gfs2_dirent_find_offset, name, ptr);
- if (!dent || IS_ERR(dent))
+ if (IS_ERR_OR_NULL(dent))
return dent;
return do_init_dirent(inode, dent, name, bh,
(unsigned)(ptr - (void *)dent));
@@ -854,7 +854,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
return ERR_PTR(error);
dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL);
got_dent:
- if (unlikely(dent == NULL || IS_ERR(dent))) {
+ if (IS_ERR_OR_NULL(dent)) {
brelse(bh);
bh = NULL;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index d174b1f8fd08..8b0c2bfa90c1 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -363,31 +363,30 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
}
/**
- * gfs2_allocate_page_backing - Use bmap to allocate blocks
+ * gfs2_allocate_page_backing - Allocate blocks for a write fault
* @page: The (locked) page to allocate backing for
*
- * We try to allocate all the blocks required for the page in
- * one go. This might fail for various reasons, so we keep
- * trying until all the blocks to back this page are allocated.
- * If some of the blocks are already allocated, thats ok too.
+ * We try to allocate all the blocks required for the page in one go. This
+ * might fail for various reasons, so we keep trying until all the blocks to
+ * back this page are allocated. If some of the blocks are already allocated,
+ * that is ok too.
*/
-
static int gfs2_allocate_page_backing(struct page *page)
{
- struct inode *inode = page->mapping->host;
- struct buffer_head bh;
- unsigned long size = PAGE_SIZE;
- u64 lblock = page->index << (PAGE_SHIFT - inode->i_blkbits);
+ u64 pos = page_offset(page);
+ u64 size = PAGE_SIZE;
do {
- bh.b_state = 0;
- bh.b_size = size;
- gfs2_block_map(inode, lblock, &bh, 1);
- if (!buffer_mapped(&bh))
+ struct iomap iomap = { };
+
+ if (gfs2_iomap_get_alloc(page->mapping->host, pos, 1, &iomap))
return -EIO;
- size -= bh.b_size;
- lblock += (bh.b_size >> inode->i_blkbits);
- } while(size > 0);
+
+ iomap.length = min(iomap.length, size);
+ size -= iomap.length;
+ pos += iomap.length;
+ } while (size > 0);
+
return 0;
}
@@ -408,7 +407,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_alloc_parms ap = { .aflags = 0, };
unsigned long last_index;
- u64 pos = page->index << PAGE_SHIFT;
+ u64 pos = page_offset(page);
unsigned int data_blocks, ind_blocks, rblocks;
struct gfs2_holder gh;
loff_t size;
@@ -1166,7 +1165,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
cmd = F_SETLK;
fl->fl_type = F_UNLCK;
}
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) {
if (fl->fl_type == F_UNLCK)
locks_lock_file_wait(file, fl);
return -EIO;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f1ebcb42cbf5..e23fb8b7b020 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -544,7 +544,7 @@ __acquires(&gl->gl_lockref.lock)
unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
int ret;
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) &&
target != LM_ST_UNLOCKED)
return;
lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
@@ -581,7 +581,7 @@ __acquires(&gl->gl_lockref.lock)
}
else if (ret) {
fs_err(sdp, "lm_lock ret %d\n", ret);
- GLOCK_BUG_ON(gl, !test_bit(SDF_SHUTDOWN,
+ GLOCK_BUG_ON(gl, !test_bit(SDF_WITHDRAWN,
&sdp->sd_flags));
}
} else { /* lock_nolock */
@@ -681,7 +681,7 @@ static void delete_work_func(struct work_struct *work)
goto out;
inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
- if (inode && !IS_ERR(inode)) {
+ if (!IS_ERR_OR_NULL(inode)) {
d_prune_aliases(inode);
iput(inode);
}
@@ -1075,7 +1075,7 @@ trap_recursive:
fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid));
fs_err(sdp, "lock type: %d req lock state : %d\n",
gh->gh_gl->gl_name.ln_type, gh->gh_state);
- gfs2_dump_glock(NULL, gl);
+ gfs2_dump_glock(NULL, gl, true);
BUG();
}
@@ -1094,7 +1094,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
int error = 0;
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return -EIO;
if (test_bit(GLF_LRU, &gl->gl_flags))
@@ -1610,16 +1610,16 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp)
glock_hash_walk(thaw_glock, sdp);
}
-static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
+static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
{
spin_lock(&gl->gl_lockref.lock);
- gfs2_dump_glock(seq, gl);
+ gfs2_dump_glock(seq, gl, fsid);
spin_unlock(&gl->gl_lockref.lock);
}
static void dump_glock_func(struct gfs2_glock *gl)
{
- dump_glock(NULL, gl);
+ dump_glock(NULL, gl, true);
}
/**
@@ -1704,10 +1704,12 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
* dump_holder - print information about a glock holder
* @seq: the seq_file struct
* @gh: the glock holder
+ * @fs_id_buf: pointer to file system id (if requested)
*
*/
-static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
+static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh,
+ const char *fs_id_buf)
{
struct task_struct *gh_owner = NULL;
char flags_buf[32];
@@ -1715,8 +1717,8 @@ static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
rcu_read_lock();
if (gh->gh_owner_pid)
gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
- gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
- state2str(gh->gh_state),
+ gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
+ fs_id_buf, state2str(gh->gh_state),
hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
gh->gh_error,
gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
@@ -1766,6 +1768,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
* gfs2_dump_glock - print information about a glock
* @seq: The seq_file struct
* @gl: the glock
+ * @fsid: If true, also dump the file system id
*
* The file format is as follows:
* One line per object, capital letters are used to indicate objects
@@ -1779,19 +1782,24 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*
*/
-void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
+void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
unsigned long long dtime;
const struct gfs2_holder *gh;
char gflags_buf[32];
+ char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ memset(fs_id_buf, 0, sizeof(fs_id_buf));
+ if (fsid && sdp) /* safety precaution */
+ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
dtime = jiffies - gl->gl_demote_time;
dtime *= 1000000/HZ; /* demote time in uSec */
if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
dtime = 0;
- gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n",
- state2str(gl->gl_state),
+ gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d "
+ "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state),
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number,
gflags2str(gflags_buf, gl),
@@ -1802,10 +1810,10 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
(int)gl->gl_lockref.count, gl->gl_hold_time);
list_for_each_entry(gh, &gl->gl_holders, gh_list)
- dump_holder(seq, gh);
+ dump_holder(seq, gh, fs_id_buf);
if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump)
- glops->go_dump(seq, gl);
+ glops->go_dump(seq, gl, fs_id_buf);
}
static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -2006,7 +2014,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
{
- dump_glock(seq, iter_ptr);
+ dump_glock(seq, iter_ptr, false);
return 0;
}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 149d7f6af085..e4e0bed5257c 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -199,8 +199,11 @@ extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
struct gfs2_holder *gh);
extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
-extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl);
-#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0)
+extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
+ bool fsid);
+#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \
+ gfs2_dump_glock(NULL, gl, true); \
+ BUG(); } } while(0)
extern __printf(2, 3)
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
@@ -266,7 +269,7 @@ static inline void glock_set_object(struct gfs2_glock *gl, void *object)
{
spin_lock(&gl->gl_lockref.lock);
if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL))
- gfs2_dump_glock(NULL, gl);
+ gfs2_dump_glock(NULL, gl, true);
gl->gl_object = object;
spin_unlock(&gl->gl_lockref.lock);
}
@@ -278,7 +281,7 @@ static inline void glock_set_object(struct gfs2_glock *gl, void *object)
*
* I'd love to similarly add this:
* else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object))
- * gfs2_dump_glock(NULL, gl);
+ * gfs2_dump_glock(NULL, gl, true);
* Unfortunately, that's not possible because as soon as gfs2_delete_inode
* frees the block in the rgrp, another process can reassign it for an I_NEW
* inode in gfs2_create_inode because that calls new_inode, not gfs2_iget.
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index cf4c767005b1..ff213690e364 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -461,10 +461,12 @@ static int inode_go_lock(struct gfs2_holder *gh)
* inode_go_dump - print information about an inode
* @seq: The iterator
* @ip: the inode
+ * @fs_id_buf: file system id (may be empty)
*
*/
-static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl)
+static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf)
{
struct gfs2_inode *ip = gl->gl_object;
struct inode *inode = &ip->i_inode;
@@ -477,7 +479,8 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl)
nrpages = inode->i_data.nrpages;
xa_unlock_irq(&inode->i_data.i_pages);
- gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu p:%lu\n",
+ gfs2_print_dbg(seq, "%s I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu "
+ "p:%lu\n", fs_id_buf,
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr,
IF2DT(ip->i_inode.i_mode), ip->i_flags,
@@ -503,7 +506,8 @@ static void freeze_go_sync(struct gfs2_glock *gl)
atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
error = freeze_super(sdp->sd_vfs);
if (error) {
- printk(KERN_INFO "GFS2: couldn't freeze filesystem: %d\n", error);
+ fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
+ error);
gfs2_assert_withdraw(sdp, 0);
}
queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
@@ -536,7 +540,7 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
gfs2_consist(sdp);
/* Initialize some head of the log stuff */
- if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+ if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
sdp->sd_log_sequence = head.lh_sequence + 1;
gfs2_log_pointers_init(sdp, head.lh_blkno);
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c9af93ac6c73..7a993d7c022e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -240,7 +240,8 @@ struct gfs2_glock_operations {
int (*go_demote_ok) (const struct gfs2_glock *gl);
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
- void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl);
+ void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf);
void (*go_callback)(struct gfs2_glock *gl, bool remote);
const int go_type;
const unsigned long go_flags;
@@ -504,7 +505,6 @@ struct gfs2_trans {
unsigned int tr_num_buf_rm;
unsigned int tr_num_databuf_rm;
unsigned int tr_num_revoke;
- unsigned int tr_num_revoke_rm;
struct list_head tr_list;
struct list_head tr_databuf;
@@ -609,7 +609,7 @@ struct gfs2_tune {
enum {
SDF_JOURNAL_CHECKED = 0,
SDF_JOURNAL_LIVE = 1,
- SDF_SHUTDOWN = 2,
+ SDF_WITHDRAWN = 2,
SDF_NOBARRIERS = 3,
SDF_NORECOVERY = 4,
SDF_DEMOTE = 5,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b296c59832a7..2e2a8a2fb51d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -793,7 +793,7 @@ fail_free_acls:
fail_gunlock:
gfs2_dir_no_add(&da);
gfs2_glock_dq_uninit(ghs);
- if (inode && !IS_ERR(inode)) {
+ if (!IS_ERR_OR_NULL(inode)) {
clear_nlink(inode);
if (!free_vfs_inode)
mark_inode_dirty(inode);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index c4c9700c366e..58e237fba565 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -882,7 +882,6 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
old->tr_num_buf_rm += new->tr_num_buf_rm;
old->tr_num_databuf_rm += new->tr_num_databuf_rm;
old->tr_num_revoke += new->tr_num_revoke;
- old->tr_num_revoke_rm += new->tr_num_revoke_rm;
list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
list_splice_tail_init(&new->tr_buf, &old->tr_buf);
@@ -904,7 +903,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
set_bit(TR_ATTACHED, &tr->tr_flags);
}
- sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
+ sdp->sd_log_commited_revoke += tr->tr_num_revoke;
reserved = calc_reserved(sdp);
maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
gfs2_assert_withdraw(sdp, maxres >= reserved);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 1921cda034fd..5b17979af539 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -759,9 +759,27 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
if (gfs2_meta_check(sdp, bh_ip))
error = -EIO;
- else
+ else {
+ struct gfs2_meta_header *mh =
+ (struct gfs2_meta_header *)bh_ip->b_data;
+
+ if (mh->mh_type == cpu_to_be32(GFS2_METATYPE_RG)) {
+ struct gfs2_rgrpd *rgd;
+
+ rgd = gfs2_blk2rgrpd(sdp, blkno, false);
+ if (rgd && rgd->rd_addr == blkno &&
+ rgd->rd_bits && rgd->rd_bits->bi_bh) {
+ fs_info(sdp, "Replaying 0x%llx but we "
+ "already have a bh!\n",
+ (unsigned long long)blkno);
+ fs_info(sdp, "busy:%d, pinned:%d\n",
+ buffer_busy(rgd->rd_bits->bi_bh) ? 1 : 0,
+ buffer_pinned(rgd->rd_bits->bi_bh));
+ gfs2_dump_glock(NULL, rgd->rd_gl, true);
+ }
+ }
mark_buffer_dirty(bh_ip);
-
+ }
brelse(bh_log);
brelse(bh_ip);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 456763e18def..662ef36c1874 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -251,7 +251,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head *bh, *bhs[2];
int num = 0;
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) {
*bhp = NULL;
return -EIO;
}
@@ -309,7 +309,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return -EIO;
wait_on_buffer(bh);
@@ -320,7 +320,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
gfs2_io_error_bh_wd(sdp, bh);
return -EIO;
}
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return -EIO;
return 0;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 08823bb3b2d0..4a8e5a7310f0 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -61,6 +61,13 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_complain_secs = 10;
}
+void free_sbd(struct gfs2_sbd *sdp)
+{
+ if (sdp->sd_lkstats)
+ free_percpu(sdp->sd_lkstats);
+ kfree(sdp);
+}
+
static struct gfs2_sbd *init_sbd(struct super_block *sb)
{
struct gfs2_sbd *sdp;
@@ -72,10 +79,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
sdp->sd_vfs = sb;
sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats);
- if (!sdp->sd_lkstats) {
- kfree(sdp);
- return NULL;
- }
+ if (!sdp->sd_lkstats)
+ goto fail;
sb->s_fs_info = sdp;
set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
@@ -134,8 +139,11 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
mutex_init(&sdp->sd_freeze_mutex);
return sdp;
-}
+fail:
+ free_sbd(sdp);
+ return NULL;
+}
/**
* gfs2_check_sb - Check superblock
@@ -568,7 +576,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
INIT_WORK(&jd->jd_work, gfs2_recover_func);
jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
- if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
+ if (IS_ERR_OR_NULL(jd->jd_inode)) {
if (!jd->jd_inode)
error = -ENOENT;
else
@@ -996,7 +1004,7 @@ hostdata_error:
void gfs2_lm_unmount(struct gfs2_sbd *sdp)
{
const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
+ if (likely(!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) &&
lm->lm_unmount)
lm->lm_unmount(sdp);
}
@@ -1086,8 +1094,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
if (error) {
/* In this case, we haven't initialized sysfs, so we have to
manually free the sdp. */
- free_percpu(sdp->sd_lkstats);
- kfree(sdp);
+ free_sbd(sdp);
sb->s_fs_info = NULL;
return error;
}
@@ -1190,7 +1197,6 @@ fail_lm:
gfs2_lm_unmount(sdp);
fail_debug:
gfs2_delete_debugfs_file(sdp);
- free_percpu(sdp->sd_lkstats);
/* gfs2_sys_fs_del must be the last thing we do, since it causes
* sysfs to call function gfs2_sbd_release, which frees sdp. */
gfs2_sys_fs_del(sdp);
@@ -1370,7 +1376,6 @@ static void gfs2_kill_sb(struct super_block *sb)
sdp->sd_root_dir = NULL;
sdp->sd_master_dir = NULL;
shrink_dcache_sb(sb);
- free_percpu(sdp->sd_lkstats);
kill_block_super(sb);
}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 8189b581236d..69c4b77f127b 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1475,7 +1475,7 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
{
if (error == 0 || error == -EROFS)
return;
- if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+ if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
sdp->sd_log_error = error;
wake_up(&sdp->sd_logd_waitq);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 2299a3fa1911..c529f8749a89 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -388,7 +388,8 @@ void gfs2_recover_func(struct work_struct *work)
}
t_tlck = ktime_get();
- fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
+ fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n",
+ jd->jd_jid, head.lh_tail, head.lh_blkno);
for (pass = 0; pass < 2; pass++) {
lops_before_scan(jd, &head, pass);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 36f20a89d0c2..49ac0a5e74ea 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -610,11 +610,12 @@ int gfs2_rsqa_alloc(struct gfs2_inode *ip)
return gfs2_qa_alloc(ip);
}
-static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
+static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs,
+ const char *fs_id_buf)
{
struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
- gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n",
+ gfs2_print_dbg(seq, "%s B: n:%llu s:%llu b:%u f:%u\n", fs_id_buf,
(unsigned long long)ip->i_no_addr,
(unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
rs->rs_rbm.offset, rs->rs_free);
@@ -1111,32 +1112,33 @@ static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
{
struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
+ struct gfs2_sbd *sdp = rgd->rd_sbd;
int valid = 1;
if (rgl->rl_flags != str->rg_flags) {
- printk(KERN_WARNING "GFS2: rgd: %llu lvb flag mismatch %u/%u",
- (unsigned long long)rgd->rd_addr,
+ fs_warn(sdp, "GFS2: rgd: %llu lvb flag mismatch %u/%u",
+ (unsigned long long)rgd->rd_addr,
be32_to_cpu(rgl->rl_flags), be32_to_cpu(str->rg_flags));
valid = 0;
}
if (rgl->rl_free != str->rg_free) {
- printk(KERN_WARNING "GFS2: rgd: %llu lvb free mismatch %u/%u",
- (unsigned long long)rgd->rd_addr,
- be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free));
+ fs_warn(sdp, "GFS2: rgd: %llu lvb free mismatch %u/%u",
+ (unsigned long long)rgd->rd_addr,
+ be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free));
valid = 0;
}
if (rgl->rl_dinodes != str->rg_dinodes) {
- printk(KERN_WARNING "GFS2: rgd: %llu lvb dinode mismatch %u/%u",
- (unsigned long long)rgd->rd_addr,
- be32_to_cpu(rgl->rl_dinodes),
- be32_to_cpu(str->rg_dinodes));
+ fs_warn(sdp, "GFS2: rgd: %llu lvb dinode mismatch %u/%u",
+ (unsigned long long)rgd->rd_addr,
+ be32_to_cpu(rgl->rl_dinodes),
+ be32_to_cpu(str->rg_dinodes));
valid = 0;
}
if (rgl->rl_igeneration != str->rg_igeneration) {
- printk(KERN_WARNING "GFS2: rgd: %llu lvb igen mismatch "
- "%llu/%llu", (unsigned long long)rgd->rd_addr,
- (unsigned long long)be64_to_cpu(rgl->rl_igeneration),
- (unsigned long long)be64_to_cpu(str->rg_igeneration));
+ fs_warn(sdp, "GFS2: rgd: %llu lvb igen mismatch %llu/%llu",
+ (unsigned long long)rgd->rd_addr,
+ (unsigned long long)be64_to_cpu(rgl->rl_igeneration),
+ (unsigned long long)be64_to_cpu(str->rg_igeneration));
valid = 0;
}
return valid;
@@ -2246,10 +2248,12 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
* gfs2_rgrp_dump - print out an rgrp
* @seq: The iterator
* @gl: The glock in question
+ * @fs_id_buf: pointer to file system id (if requested)
*
*/
-void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
+void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf)
{
struct gfs2_rgrpd *rgd = gl->gl_object;
struct gfs2_blkreserv *trs;
@@ -2257,14 +2261,15 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
if (rgd == NULL)
return;
- gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
+ gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
+ fs_id_buf,
(unsigned long long)rgd->rd_addr, rgd->rd_flags,
rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
rgd->rd_reserved, rgd->rd_extfail_pt);
if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
- gfs2_print_dbg(seq, " L: f:%02x b:%u i:%u\n",
+ gfs2_print_dbg(seq, "%s L: f:%02x b:%u i:%u\n", fs_id_buf,
be32_to_cpu(rgl->rl_flags),
be32_to_cpu(rgl->rl_free),
be32_to_cpu(rgl->rl_dinodes));
@@ -2272,7 +2277,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
spin_lock(&rgd->rd_rsspin);
for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
- dump_rs(seq, trs);
+ dump_rs(seq, trs, fs_id_buf);
}
spin_unlock(&rgd->rd_rsspin);
}
@@ -2280,10 +2285,13 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
+ char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
+
fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
(unsigned long long)rgd->rd_addr);
fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
- gfs2_rgrp_dump(NULL, rgd->rd_gl);
+ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
+ gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf);
rgd->rd_flags |= GFS2_RDF_ERROR;
}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 6a3adf0ee0b7..c14a673ae36f 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -69,7 +69,8 @@ extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist);
extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
-extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl);
+extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf);
extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
struct buffer_head *bh,
const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b70cea5c8c59..0acc5834f653 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -394,6 +394,7 @@ static int init_threads(struct gfs2_sbd *sdp)
fail:
kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
return error;
}
@@ -451,8 +452,12 @@ fail:
freeze_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_uninit(&freeze_gh);
fail_threads:
- kthread_stop(sdp->sd_quotad_process);
- kthread_stop(sdp->sd_logd_process);
+ if (sdp->sd_quotad_process)
+ kthread_stop(sdp->sd_quotad_process);
+ sdp->sd_quotad_process = NULL;
+ if (sdp->sd_logd_process)
+ kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
return error;
}
@@ -800,7 +805,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
if (!(flags & I_DIRTY_INODE))
return;
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return;
if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
@@ -849,12 +854,16 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE,
&freeze_gh);
- if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ if (error && !test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
return error;
flush_workqueue(gfs2_delete_workqueue);
- kthread_stop(sdp->sd_quotad_process);
- kthread_stop(sdp->sd_logd_process);
+ if (sdp->sd_quotad_process)
+ kthread_stop(sdp->sd_quotad_process);
+ sdp->sd_quotad_process = NULL;
+ if (sdp->sd_logd_process)
+ kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
gfs2_quota_sync(sdp->sd_vfs, 0);
gfs2_statfs_sync(sdp->sd_vfs, 0);
@@ -969,14 +978,14 @@ void gfs2_freeze_func(struct work_struct *work)
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0,
&freeze_gh);
if (error) {
- printk(KERN_INFO "GFS2: couldn't get freeze lock : %d\n", error);
+ fs_info(sdp, "GFS2: couldn't get freeze lock : %d\n", error);
gfs2_assert_withdraw(sdp, 0);
} else {
atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
error = thaw_super(sb);
if (error) {
- printk(KERN_INFO "GFS2: couldn't thaw filesystem: %d\n",
- error);
+ fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n",
+ error);
gfs2_assert_withdraw(sdp, 0);
}
if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
@@ -1004,7 +1013,7 @@ static int gfs2_freeze(struct super_block *sb)
if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN)
goto out;
- if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+ if (test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
error = -EINVAL;
goto out;
}
@@ -1014,20 +1023,14 @@ static int gfs2_freeze(struct super_block *sb)
if (!error)
break;
- switch (error) {
- case -EBUSY:
+ if (error == -EBUSY)
fs_err(sdp, "waiting for recovery before freeze\n");
- break;
-
- default:
+ else
fs_err(sdp, "error freezing FS: %d\n", error);
- break;
- }
fs_err(sdp, "retrying...\n");
msleep(1000);
}
- error = 0;
set_bit(SDF_FS_FROZEN, &sdp->sd_flags);
out:
mutex_unlock(&sdp->sd_freeze_mutex);
@@ -1273,8 +1276,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
error = gfs2_make_fs_ro(sdp);
else
error = gfs2_make_fs_rw(sdp);
- if (error)
- return error;
}
sdp->sd_args = args;
@@ -1300,7 +1301,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
spin_unlock(&gt->gt_spin);
gfs2_online_uevent(sdp);
- return 0;
+ return error;
}
/**
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index c5f42f0c503b..9d49eaadb9d9 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -44,6 +44,8 @@ extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
extern int gfs2_statfs_sync(struct super_block *sb, int type);
extern void gfs2_freeze_func(struct work_struct *work);
+extern void free_sbd(struct gfs2_sbd *sdp);
+
extern struct file_system_type gfs2_fs_type;
extern struct file_system_type gfs2meta_fs_type;
extern const struct export_operations gfs2_export_ops;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 159aedf63c2a..289328831e24 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -118,7 +118,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
{
- unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags);
+ unsigned int b = test_bit(SDF_WITHDRAWN, &sdp->sd_flags);
return snprintf(buf, PAGE_SIZE, "%u\n", b);
}
@@ -301,7 +301,7 @@ static void gfs2_sbd_release(struct kobject *kobj)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
- kfree(sdp);
+ free_sbd(sdp);
}
static struct kobj_type gfs2_ktype = {
@@ -679,7 +679,6 @@ fail_lock_module:
fail_tune:
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
fail_reg:
- free_percpu(sdp->sd_lkstats);
fs_err(sdp, "error %d adding sysfs files\n", error);
kobject_put(&sdp->sd_kobj);
sb->s_fs_info = NULL;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 6f67ef7aa412..35e3059255fe 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -77,10 +77,10 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr)
fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n",
tr->tr_blocks, tr->tr_revokes, tr->tr_reserved,
test_bit(TR_TOUCHED, &tr->tr_flags));
- fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n",
+ fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u\n",
tr->tr_num_buf_new, tr->tr_num_buf_rm,
tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
- tr->tr_num_revoke, tr->tr_num_revoke_rm);
+ tr->tr_num_revoke);
}
void gfs2_trans_end(struct gfs2_sbd *sdp)
@@ -263,7 +263,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
sdp->sd_log_num_revoke--;
kmem_cache_free(gfs2_bufdata_cachep, bd);
- tr->tr_num_revoke_rm++;
+ tr->tr_num_revoke--;
if (--n == 0)
break;
}
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index a7e55234211f..83f6c582773a 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -41,7 +41,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...)
struct va_format vaf;
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
- test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags))
return 0;
if (fmt) {
@@ -178,9 +178,11 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
const char *function, char *file, unsigned int line)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
+ char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
int rv;
- gfs2_rgrp_dump(NULL, rgd->rd_gl);
+ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
+ gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf);
rv = gfs2_lm_withdraw(sdp,
"fatal: filesystem consistency error\n"
" RG = %llu\n"
@@ -256,7 +258,7 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *function, char *file, unsigned int line,
bool withdraw)
{
- if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
fs_err(sdp,
"fatal: I/O error\n"
" block = %llu\n"
diff --git a/fs/inode.c b/fs/inode.c
index df6542ec3b88..5f5431ec3d62 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -362,7 +362,7 @@ EXPORT_SYMBOL(inc_nlink);
static void __address_space_init_once(struct address_space *mapping)
{
- xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ);
+ xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
init_rwsem(&mapping->i_mmap_rwsem);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
@@ -1899,6 +1899,26 @@ int file_update_time(struct file *file)
}
EXPORT_SYMBOL(file_update_time);
+/* Caller must hold the file's inode lock */
+int file_modified(struct file *file)
+{
+ int err;
+
+ /*
+ * Clear the security bits if the process is not being run by root.
+ * This keeps people from modifying setuid and setgid binaries.
+ */
+ err = file_remove_privs(file);
+ if (err)
+ return err;
+
+ if (unlikely(file->f_mode & FMODE_NOCMTIME))
+ return 0;
+
+ return file_update_time(file);
+}
+EXPORT_SYMBOL(file_modified);
+
int inode_needs_sync(struct inode *inode)
{
if (IS_SYNC(inode))
diff --git a/fs/internal.h b/fs/internal.h
index a48ef81be37d..2f3c3de51fad 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -40,8 +40,6 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
extern void guard_bio_eod(int rw, struct bio *bio);
extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap);
-void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
- struct page *page);
/*
* char_dev.c
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 86a2bd721900..4ed4b110a154 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -579,6 +579,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
state->cur_req++;
}
+ req->file = NULL;
req->ctx = ctx;
req->flags = 0;
/* one is dropped after submission, the other at completion */
@@ -997,9 +998,6 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
if (offset)
iov_iter_advance(iter, offset);
-
- /* don't drop a reference to these pages */
- iter->type |= ITER_BVEC_FLAG_NO_REF;
return 0;
}
@@ -1801,10 +1799,8 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
req->sequence = ctx->cached_sq_head - 1;
}
- if (!io_op_needs_file(s->sqe)) {
- req->file = NULL;
+ if (!io_op_needs_file(s->sqe))
return 0;
- }
if (flags & IOSQE_FIXED_FILE) {
if (unlikely(!ctx->user_files ||
@@ -2201,11 +2197,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
}
ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
if (sig)
- restore_user_sigmask(sig, &sigsaved);
+ restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS);
+
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0;
}
diff --git a/fs/iomap.c b/fs/iomap.c
index 12654c2e78f8..217c3e5a13d6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -333,7 +333,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
if (iop)
atomic_inc(&iop->read_count);
- if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
+ if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) {
gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -777,6 +777,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct page *page, struct iomap *iomap)
{
const struct iomap_page_ops *page_ops = iomap->page_ops;
+ loff_t old_size = inode->i_size;
int ret;
if (iomap->type == IOMAP_INLINE) {
@@ -788,9 +789,21 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
}
- __generic_write_end(inode, pos, ret, page);
+ /*
+ * Update the in-memory inode size after copying the data into the page
+ * cache. It's up to the file system to write the updated size to disk,
+ * preferably after I/O completion so that no stale data is exposed.
+ */
+ if (pos + ret > old_size) {
+ i_size_write(inode, pos + ret);
+ iomap->flags |= IOMAP_F_SIZE_CHANGED;
+ }
+ unlock_page(page);
+
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
if (page_ops && page_ops->page_done)
- page_ops->page_done(inode, pos, copied, page, iomap);
+ page_ops->page_done(inode, pos, ret, page, iomap);
put_page(page);
if (ret < len)
@@ -1599,13 +1612,7 @@ static void iomap_dio_bio_end_io(struct bio *bio)
if (should_dirty) {
bio_check_pages_dirty(bio);
} else {
- if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
- struct bvec_iter_all iter_all;
- struct bio_vec *bvec;
-
- bio_for_each_segment_all(bvec, bio, iter_all)
- put_page(bvec->bv_page);
- }
+ bio_release_pages(bio, false);
bio_put(bio);
}
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index efd0ce9489ae..132fb92098c7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -184,17 +184,18 @@ static int journal_wait_on_commit_record(journal_t *journal,
/*
* write the filemap data using writepage() address_space_operations.
* We don't do block allocation here even for delalloc. We don't
- * use writepages() because with dealyed allocation we may be doing
+ * use writepages() because with delayed allocation we may be doing
* block allocation in writepages().
*/
-static int journal_submit_inode_data_buffers(struct address_space *mapping)
+static int journal_submit_inode_data_buffers(struct address_space *mapping,
+ loff_t dirty_start, loff_t dirty_end)
{
int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = mapping->nrpages * 2,
- .range_start = 0,
- .range_end = i_size_read(mapping->host),
+ .range_start = dirty_start,
+ .range_end = dirty_end,
};
ret = generic_writepages(mapping, &wbc);
@@ -218,6 +219,9 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+ loff_t dirty_start = jinode->i_dirty_start;
+ loff_t dirty_end = jinode->i_dirty_end;
+
if (!(jinode->i_flags & JI_WRITE_DATA))
continue;
mapping = jinode->i_vfs_inode->i_mapping;
@@ -230,7 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal,
* only allocated blocks here.
*/
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
- err = journal_submit_inode_data_buffers(mapping);
+ err = journal_submit_inode_data_buffers(mapping, dirty_start,
+ dirty_end);
if (!ret)
ret = err;
spin_lock(&journal->j_list_lock);
@@ -257,12 +262,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+ loff_t dirty_start = jinode->i_dirty_start;
+ loff_t dirty_end = jinode->i_dirty_end;
+
if (!(jinode->i_flags & JI_WAIT_DATA))
continue;
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
- err = filemap_fdatawait_keep_errors(
- jinode->i_vfs_inode->i_mapping);
+ err = filemap_fdatawait_range_keep_errors(
+ jinode->i_vfs_inode->i_mapping, dirty_start,
+ dirty_end);
if (!ret)
ret = err;
spin_lock(&journal->j_list_lock);
@@ -282,6 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
&jinode->i_transaction->t_inode_list);
} else {
jinode->i_transaction = NULL;
+ jinode->i_dirty_start = 0;
+ jinode->i_dirty_end = 0;
}
}
spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 43df0c943229..953990eb70a9 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -66,9 +66,6 @@ EXPORT_SYMBOL(jbd2_journal_get_undo_access);
EXPORT_SYMBOL(jbd2_journal_set_triggers);
EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
EXPORT_SYMBOL(jbd2_journal_forget);
-#if 0
-EXPORT_SYMBOL(journal_sync_buffer);
-#endif
EXPORT_SYMBOL(jbd2_journal_flush);
EXPORT_SYMBOL(jbd2_journal_revoke);
@@ -94,6 +91,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
EXPORT_SYMBOL(jbd2_journal_force_commit);
EXPORT_SYMBOL(jbd2_journal_inode_add_write);
EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
+EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
+EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
@@ -203,7 +202,7 @@ loop:
if (journal->j_flags & JBD2_UNMOUNT)
goto end_loop;
- jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
+ jbd_debug(1, "commit_sequence=%u, commit_request=%u\n",
journal->j_commit_sequence, journal->j_commit_request);
if (journal->j_commit_sequence != journal->j_commit_request) {
@@ -324,7 +323,7 @@ static void journal_kill_thread(journal_t *journal)
* IO is in progress. do_get_write_access() handles this.
*
* The function returns a pointer to the buffer_head to be used for IO.
- *
+ *
*
* Return value:
* <0: Error
@@ -500,7 +499,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
*/
journal->j_commit_request = target;
- jbd_debug(1, "JBD2: requesting commit %d/%d\n",
+ jbd_debug(1, "JBD2: requesting commit %u/%u\n",
journal->j_commit_request,
journal->j_commit_sequence);
journal->j_running_transaction->t_requested = jiffies;
@@ -513,7 +512,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
journal->j_commit_request,
journal->j_commit_sequence,
- target, journal->j_running_transaction ?
+ target, journal->j_running_transaction ?
journal->j_running_transaction->t_tid : 0);
return 0;
}
@@ -698,12 +697,12 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
#ifdef CONFIG_JBD2_DEBUG
if (!tid_geq(journal->j_commit_request, tid)) {
printk(KERN_ERR
- "%s: error: j_commit_request=%d, tid=%d\n",
+ "%s: error: j_commit_request=%u, tid=%u\n",
__func__, journal->j_commit_request, tid);
}
#endif
while (tid_gt(tid, journal->j_commit_sequence)) {
- jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
+ jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
tid, journal->j_commit_sequence);
read_unlock(&journal->j_state_lock);
wake_up(&journal->j_wait_commit);
@@ -944,7 +943,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
trace_jbd2_update_log_tail(journal, tid, block, freed);
jbd_debug(1,
- "Cleaning journal tail from %d to %d (offset %lu), "
+ "Cleaning journal tail from %u to %u (offset %lu), "
"freeing %lu\n",
journal->j_tail_sequence, tid, block, freed);
@@ -1318,7 +1317,7 @@ static int journal_reset(journal_t *journal)
*/
if (sb->s_start == 0) {
jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
- "(start %ld, seq %d, errno %d)\n",
+ "(start %ld, seq %u, errno %d)\n",
journal->j_tail, journal->j_tail_sequence,
journal->j_errno);
journal->j_flags |= JBD2_FLUSHED;
@@ -1453,7 +1452,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
return;
}
- jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
+ jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
journal->j_tail_sequence);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -2574,6 +2573,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
jinode->i_next_transaction = NULL;
jinode->i_vfs_inode = inode;
jinode->i_flags = 0;
+ jinode->i_dirty_start = 0;
+ jinode->i_dirty_end = 0;
INIT_LIST_HEAD(&jinode->i_list);
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 8ca4fddc705f..990e7b5062e7 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2565,7 +2565,7 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
* File inode in the inode list of the handle's transaction
*/
static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
- unsigned long flags)
+ unsigned long flags, loff_t start_byte, loff_t end_byte)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal;
@@ -2577,26 +2577,17 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
transaction->t_tid);
- /*
- * First check whether inode isn't already on the transaction's
- * lists without taking the lock. Note that this check is safe
- * without the lock as we cannot race with somebody removing inode
- * from the transaction. The reason is that we remove inode from the
- * transaction only in journal_release_jbd_inode() and when we commit
- * the transaction. We are guarded from the first case by holding
- * a reference to the inode. We are safe against the second case
- * because if jinode->i_transaction == transaction, commit code
- * cannot touch the transaction because we hold reference to it,
- * and if jinode->i_next_transaction == transaction, commit code
- * will only file the inode where we want it.
- */
- if ((jinode->i_transaction == transaction ||
- jinode->i_next_transaction == transaction) &&
- (jinode->i_flags & flags) == flags)
- return 0;
-
spin_lock(&journal->j_list_lock);
jinode->i_flags |= flags;
+
+ if (jinode->i_dirty_end) {
+ jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
+ jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
+ } else {
+ jinode->i_dirty_start = start_byte;
+ jinode->i_dirty_end = end_byte;
+ }
+
/* Is inode already attached where we need it? */
if (jinode->i_transaction == transaction ||
jinode->i_next_transaction == transaction)
@@ -2631,12 +2622,28 @@ done:
int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
{
return jbd2_journal_file_inode(handle, jinode,
- JI_WRITE_DATA | JI_WAIT_DATA);
+ JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
}
int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
{
- return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA);
+ return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
+ LLONG_MAX);
+}
+
+int jbd2_journal_inode_ranged_write(handle_t *handle,
+ struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
+{
+ return jbd2_journal_file_inode(handle, jinode,
+ JI_WRITE_DATA | JI_WAIT_DATA, start_byte,
+ start_byte + length - 1);
+}
+
+int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode,
+ loff_t start_byte, loff_t length)
+{
+ return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
+ start_byte, start_byte + length - 1);
}
/*
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 62f98225abb3..b11f2afa84f1 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -47,13 +47,14 @@ void nlmclnt_next_cookie(struct nlm_cookie *c)
c->len=4;
}
-static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner)
+static struct nlm_lockowner *
+nlmclnt_get_lockowner(struct nlm_lockowner *lockowner)
{
refcount_inc(&lockowner->count);
return lockowner;
}
-static void nlm_put_lockowner(struct nlm_lockowner *lockowner)
+static void nlmclnt_put_lockowner(struct nlm_lockowner *lockowner)
{
if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
return;
@@ -82,28 +83,28 @@ static inline uint32_t __nlm_alloc_pid(struct nlm_host *host)
return res;
}
-static struct nlm_lockowner *__nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner)
+static struct nlm_lockowner *__nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner)
{
struct nlm_lockowner *lockowner;
list_for_each_entry(lockowner, &host->h_lockowners, list) {
if (lockowner->owner != owner)
continue;
- return nlm_get_lockowner(lockowner);
+ return nlmclnt_get_lockowner(lockowner);
}
return NULL;
}
-static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner)
+static struct nlm_lockowner *nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner)
{
struct nlm_lockowner *res, *new = NULL;
spin_lock(&host->h_lock);
- res = __nlm_find_lockowner(host, owner);
+ res = __nlmclnt_find_lockowner(host, owner);
if (res == NULL) {
spin_unlock(&host->h_lock);
new = kmalloc(sizeof(*new), GFP_KERNEL);
spin_lock(&host->h_lock);
- res = __nlm_find_lockowner(host, owner);
+ res = __nlmclnt_find_lockowner(host, owner);
if (res == NULL && new != NULL) {
res = new;
refcount_set(&new->count, 1);
@@ -457,7 +458,7 @@ static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
- new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
+ new->fl_u.nfs_fl.owner = nlmclnt_get_lockowner(fl->fl_u.nfs_fl.owner);
list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
}
@@ -467,7 +468,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl)
spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
list_del(&fl->fl_u.nfs_fl.list);
spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
- nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
+ nlmclnt_put_lockowner(fl->fl_u.nfs_fl.owner);
}
static const struct file_lock_operations nlmclnt_lock_ops = {
@@ -478,7 +479,7 @@ static const struct file_lock_operations nlmclnt_lock_ops = {
static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host)
{
fl->fl_u.nfs_fl.state = 0;
- fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
+ fl->fl_u.nfs_fl.owner = nlmclnt_find_lockowner(host, fl->fl_owner);
INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
fl->fl_ops = &nlmclnt_lock_ops;
}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 1bddf70d9656..e4d3f783e06a 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -46,8 +46,14 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Set up the missing parts of the file_lock structure */
lock->fl.fl_file = file->f_file;
- lock->fl.fl_owner = (fl_owner_t) host;
+ lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
+ nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
+ if (!lock->fl.fl_owner) {
+ /* lockowner allocation has failed */
+ nlmsvc_release_host(host);
+ return nlm_lck_denied_nolocks;
+ }
}
return 0;
@@ -94,6 +100,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
@@ -142,6 +149,7 @@ __nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
@@ -178,6 +186,7 @@ __nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = nlmsvc_cancel_blocked(SVC_NET(rqstp), file, &argp->lock);
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -217,6 +226,7 @@ __nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = nlmsvc_unlock(SVC_NET(rqstp), file, &argp->lock);
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -365,6 +375,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp)
resp->status = nlmsvc_share_file(host, file, argp);
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -399,6 +410,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp)
resp->status = nlmsvc_unshare_file(host, file, argp);
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ea719cdd6a36..61d3cc2283dc 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -332,6 +332,93 @@ restart:
mutex_unlock(&file->f_mutex);
}
+static struct nlm_lockowner *
+nlmsvc_get_lockowner(struct nlm_lockowner *lockowner)
+{
+ refcount_inc(&lockowner->count);
+ return lockowner;
+}
+
+static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner)
+{
+ if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
+ return;
+ list_del(&lockowner->list);
+ spin_unlock(&lockowner->host->h_lock);
+ nlmsvc_release_host(lockowner->host);
+ kfree(lockowner);
+}
+
+static struct nlm_lockowner *__nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid)
+{
+ struct nlm_lockowner *lockowner;
+ list_for_each_entry(lockowner, &host->h_lockowners, list) {
+ if (lockowner->pid != pid)
+ continue;
+ return nlmsvc_get_lockowner(lockowner);
+ }
+ return NULL;
+}
+
+static struct nlm_lockowner *nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid)
+{
+ struct nlm_lockowner *res, *new = NULL;
+
+ spin_lock(&host->h_lock);
+ res = __nlmsvc_find_lockowner(host, pid);
+
+ if (res == NULL) {
+ spin_unlock(&host->h_lock);
+ new = kmalloc(sizeof(*res), GFP_KERNEL);
+ spin_lock(&host->h_lock);
+ res = __nlmsvc_find_lockowner(host, pid);
+ if (res == NULL && new != NULL) {
+ res = new;
+ /* fs/locks.c will manage the refcount through lock_ops */
+ refcount_set(&new->count, 1);
+ new->pid = pid;
+ new->host = nlm_get_host(host);
+ list_add(&new->list, &host->h_lockowners);
+ new = NULL;
+ }
+ }
+
+ spin_unlock(&host->h_lock);
+ kfree(new);
+ return res;
+}
+
+void
+nlmsvc_release_lockowner(struct nlm_lock *lock)
+{
+ if (lock->fl.fl_owner)
+ nlmsvc_put_lockowner(lock->fl.fl_owner);
+}
+
+static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+ struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner;
+ new->fl_owner = nlmsvc_get_lockowner(nlm_lo);
+}
+
+static void nlmsvc_locks_release_private(struct file_lock *fl)
+{
+ nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner);
+}
+
+static const struct file_lock_operations nlmsvc_lock_ops = {
+ .fl_copy_lock = nlmsvc_locks_copy_lock,
+ .fl_release_private = nlmsvc_locks_release_private,
+};
+
+void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host,
+ pid_t pid)
+{
+ fl->fl_owner = nlmsvc_find_lockowner(host, pid);
+ if (fl->fl_owner != NULL)
+ fl->fl_ops = &nlmsvc_lock_ops;
+}
+
/*
* Initialize arguments for GRANTED call. The nlm_rqst structure
* has been cleared already.
@@ -345,7 +432,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
/* set default data area */
call->a_args.lock.oh.data = call->a_owner;
- call->a_args.lock.svid = lock->fl.fl_pid;
+ call->a_args.lock.svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
if (lock->oh.len > NLMCLNT_OHSIZE) {
void *data = kmalloc(lock->oh.len, GFP_KERNEL);
@@ -509,6 +596,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
{
int error;
__be32 ret;
+ struct nlm_lockowner *test_owner;
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
locks_inode(file->f_file)->i_sb->s_id,
@@ -522,6 +610,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
goto out;
}
+ /* If there's a conflicting lock, remember to clean up the test lock */
+ test_owner = (struct nlm_lockowner *)lock->fl.fl_owner;
+
error = vfs_test_lock(file->f_file, &lock->fl);
if (error) {
/* We can't currently deal with deferred test requests */
@@ -543,11 +634,16 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
conflock->caller = "somehost"; /* FIXME */
conflock->len = strlen(conflock->caller);
conflock->oh.len = 0; /* don't return OH info */
- conflock->svid = lock->fl.fl_pid;
+ conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
conflock->fl.fl_type = lock->fl.fl_type;
conflock->fl.fl_start = lock->fl.fl_start;
conflock->fl.fl_end = lock->fl.fl_end;
locks_release_private(&lock->fl);
+
+ /* Clean up the test lock */
+ lock->fl.fl_owner = NULL;
+ nlmsvc_put_lockowner(test_owner);
+
ret = nlm_lck_denied;
out:
return ret;
@@ -692,25 +788,7 @@ nlmsvc_notify_blocked(struct file_lock *fl)
printk(KERN_WARNING "lockd: notification for unknown block!\n");
}
-static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
-{
- return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
-}
-
-/*
- * Since NLM uses two "keys" for tracking locks, we need to hash them down
- * to one for the blocked_hash. Here, we're just xor'ing the host address
- * with the pid in order to create a key value for picking a hash bucket.
- */
-static unsigned long
-nlmsvc_owner_key(struct file_lock *fl)
-{
- return (unsigned long)fl->fl_owner ^ (unsigned long)fl->fl_pid;
-}
-
const struct lock_manager_operations nlmsvc_lock_operations = {
- .lm_compare_owner = nlmsvc_same_owner,
- .lm_owner_key = nlmsvc_owner_key,
.lm_notify = nlmsvc_notify_blocked,
.lm_grant = nlmsvc_grant_deferred,
};
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index ea77c66d3cc3..d0bb7a6bf005 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -76,8 +76,14 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Set up the missing parts of the file_lock structure */
lock->fl.fl_file = file->f_file;
- lock->fl.fl_owner = (fl_owner_t) host;
+ lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
+ nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
+ if (!lock->fl.fl_owner) {
+ /* lockowner allocation has failed */
+ nlmsvc_release_host(host);
+ return nlm_lck_denied_nolocks;
+ }
}
return 0;
@@ -125,6 +131,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
dprintk("lockd: TEST status %d vers %d\n",
ntohl(resp->status), rqstp->rq_vers);
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
@@ -173,6 +180,7 @@ __nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
@@ -210,6 +218,7 @@ __nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = cast_status(nlmsvc_cancel_blocked(net, file, &argp->lock));
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -250,6 +259,7 @@ __nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = cast_status(nlmsvc_unlock(net, file, &argp->lock));
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -408,6 +418,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp)
resp->status = cast_status(nlmsvc_share_file(host, file, argp));
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -442,6 +453,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp)
resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 0e610f422406..028fc152da22 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -180,7 +180,7 @@ again:
/* update current lock count */
file->f_locks++;
- lockhost = (struct nlm_host *) fl->fl_owner;
+ lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host;
if (match(lockhost, host)) {
struct file_lock lock = *fl;
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 7147e4aebecc..982629f7b120 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -126,8 +126,6 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
lock->svid = ntohl(*p++);
locks_init_lock(fl);
- fl->fl_owner = current->files;
- fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
start = ntohl(*p++);
@@ -269,7 +267,6 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
lock->svid = ~(u32) 0;
- lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 7ed9edf9aed4..5fa9f48a9dba 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -118,8 +118,6 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
lock->svid = ntohl(*p++);
locks_init_lock(fl);
- fl->fl_owner = current->files;
- fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
p = xdr_decode_hyper(p, &start);
@@ -266,7 +264,6 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
lock->svid = ~(u32) 0;
- lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm4_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
diff --git a/fs/locks.c b/fs/locks.c
index ec1e4a5df629..686eae21daf6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -658,9 +658,6 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
*/
static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
{
- if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner)
- return fl2->fl_lmops == fl1->fl_lmops &&
- fl1->fl_lmops->lm_compare_owner(fl1, fl2);
return fl1->fl_owner == fl2->fl_owner;
}
@@ -701,8 +698,6 @@ static void locks_delete_global_locks(struct file_lock *fl)
static unsigned long
posix_owner_key(struct file_lock *fl)
{
- if (fl->fl_lmops && fl->fl_lmops->lm_owner_key)
- return fl->fl_lmops->lm_owner_key(fl);
return (unsigned long)fl->fl_owner;
}
@@ -1534,11 +1529,21 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
{
- if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT))
- return false;
- if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
- return false;
- return locks_conflict(breaker, lease);
+ bool rc;
+
+ if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) {
+ rc = false;
+ goto trace;
+ }
+ if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) {
+ rc = false;
+ goto trace;
+ }
+
+ rc = locks_conflict(breaker, lease);
+trace:
+ trace_leases_conflict(rc, lease, breaker);
+ return rc;
}
static bool
@@ -1753,10 +1758,10 @@ int fcntl_getlease(struct file *filp)
}
/**
- * check_conflicting_open - see if the given dentry points to a file that has
+ * check_conflicting_open - see if the given file points to an inode that has
* an existing open that would conflict with the
* desired lease.
- * @dentry: dentry to check
+ * @filp: file to check
* @arg: type of lease that we're trying to acquire
* @flags: current lock flags
*
@@ -1764,30 +1769,42 @@ int fcntl_getlease(struct file *filp)
* conflict with the lease we're trying to set.
*/
static int
-check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
+check_conflicting_open(struct file *filp, const long arg, int flags)
{
- int ret = 0;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = locks_inode(filp);
+ int self_wcount = 0, self_rcount = 0;
if (flags & FL_LAYOUT)
return 0;
- if ((arg == F_RDLCK) && inode_is_open_for_write(inode))
- return -EAGAIN;
+ if (arg == F_RDLCK)
+ return inode_is_open_for_write(inode) ? -EAGAIN : 0;
+ else if (arg != F_WRLCK)
+ return 0;
- if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
- (atomic_read(&inode->i_count) > 1)))
- ret = -EAGAIN;
+ /*
+ * Make sure that only read/write count is from lease requestor.
+ * Note that this will result in denying write leases when i_writecount
+ * is negative, which is what we want. (We shouldn't grant write leases
+ * on files open for execution.)
+ */
+ if (filp->f_mode & FMODE_WRITE)
+ self_wcount = 1;
+ else if (filp->f_mode & FMODE_READ)
+ self_rcount = 1;
- return ret;
+ if (atomic_read(&inode->i_writecount) != self_wcount ||
+ atomic_read(&inode->i_readcount) != self_rcount)
+ return -EAGAIN;
+
+ return 0;
}
static int
generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
{
struct file_lock *fl, *my_fl = NULL, *lease;
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = locks_inode(filp);
struct file_lock_context *ctx;
bool is_deleg = (*flp)->fl_flags & FL_DELEG;
int error;
@@ -1822,7 +1839,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
- error = check_conflicting_open(dentry, arg, lease->fl_flags);
+ error = check_conflicting_open(filp, arg, lease->fl_flags);
if (error)
goto out;
@@ -1879,7 +1896,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
* precedes these checks.
*/
smp_mb();
- error = check_conflicting_open(dentry, arg, lease->fl_flags);
+ error = check_conflicting_open(filp, arg, lease->fl_flags);
if (error) {
locks_unlink_lock_ctx(lease);
goto out;
diff --git a/fs/namei.c b/fs/namei.c
index 20831c2fbb34..209c51a5226c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3883,6 +3883,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
dentry->d_inode->i_flags |= S_DEAD;
dont_mount(dentry);
detach_mounts(dentry);
+ fsnotify_rmdir(dir, dentry);
out:
inode_unlock(dentry->d_inode);
@@ -3999,6 +4000,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
if (!error) {
dont_mount(dentry);
detach_mounts(dentry);
+ fsnotify_unlink(dir, dentry);
}
}
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 7660c2749c96..6fbc9126367a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2596,11 +2596,12 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
if (!check_mnt(p))
goto out;
- /* The thing moved should be either ours or completely unattached. */
- if (attached && !check_mnt(old))
+ /* The thing moved must be mounted... */
+ if (!is_mounted(&old->mnt))
goto out;
- if (!attached && !(ns && is_anon_ns(ns)))
+ /* ... and either ours or the root of anon namespace */
+ if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
goto out;
if (old->mnt.mnt_flags & MNT_LOCKED)
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index e6a700f01452..aec769a500a1 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -22,7 +22,8 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
char *ip_addr = NULL;
int ip_len;
- ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL, false);
+ ip_len = dns_query(net, NULL, name, namelen, NULL, &ip_addr, NULL,
+ false);
if (ip_len > 0)
ret = rpc_pton(net, ip_addr, ip_len, sa, salen);
else
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index a809989807d6..19f856f45689 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -18,7 +18,7 @@
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
-static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;
+static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO;
static unsigned int dataserver_retrans;
static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index cf42a8b939e3..f4157eb1f69d 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -129,10 +129,13 @@ nfs4_file_flush(struct file *file, fl_owner_t id)
}
#ifdef CONFIG_NFS_V4_2
-static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t count, unsigned int flags)
+static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t count, unsigned int flags)
{
+ /* Only offload copy if superblock is the same */
+ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
+ return -EXDEV;
if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY))
return -EOPNOTSUPP;
if (file_inode(file_in) == file_inode(file_out))
@@ -140,6 +143,20 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
}
+static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t count, unsigned int flags)
+{
+ ssize_t ret;
+
+ ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count,
+ flags);
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(file_in, pos_in, file_out,
+ pos_out, count, flags);
+ return ret;
+}
+
static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
{
loff_t ret;
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 4884fdae28fb..1e7296395d71 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -291,7 +291,7 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
if (IS_ERR(rkey)) {
mutex_lock(&idmap->idmap_mutex);
rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
- desc, "", 0, idmap);
+ desc, NULL, "", 0, idmap);
mutex_unlock(&idmap->idmap_mutex);
}
if (!IS_ERR(rkey))
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 52d533967485..0effeee28352 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -396,12 +396,6 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
nfs_cancel_async_unlink(dentry);
return;
}
-
- /*
- * vfs_unlink and the like do not issue this when a file is
- * sillyrenamed, so do it here.
- */
- fsnotify_nameremove(dentry, 0);
}
#define SILLYNAME_PREFIX ".nfs"
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 4fb1f72a25fb..66d4c55eb48e 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -121,15 +121,13 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
{
loff_t new_size = lcp->lc_last_wr + 1;
struct iattr iattr = { .ia_valid = 0 };
- struct timespec ts;
int error;
- ts = timespec64_to_timespec(inode->i_mtime);
if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
- timespec_compare(&lcp->lc_mtime, &ts) < 0)
- lcp->lc_mtime = timespec64_to_timespec(current_time(inode));
+ timespec64_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
+ lcp->lc_mtime = current_time(inode);
iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
- iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = timespec_to_timespec64(lcp->lc_mtime);
+ iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
if (new_size > i_size_read(inode)) {
iattr.ia_valid |= ATTR_SIZE;
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 4a98537efb0f..10ec5ecdf117 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -10,6 +10,7 @@
#define NFSCACHE_H
#include <linux/sunrpc/svc.h>
+#include "netns.h"
/*
* Representation of a reply cache entry.
@@ -77,8 +78,8 @@ enum {
/* Checksum this amount of the request */
#define RC_CSUMLEN (256U)
-int nfsd_reply_cache_init(void);
-void nfsd_reply_cache_shutdown(void);
+int nfsd_reply_cache_init(struct nfsd_net *);
+void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *);
void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
int nfsd_reply_cache_stats_open(struct inode *, struct file *);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 7c686a270d60..bdfe5bcb3dcd 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -42,6 +42,11 @@ struct nfsd_net {
bool grace_ended;
time_t boot_time;
+ /* internal mount of the "nfsd" pseudofilesystem: */
+ struct vfsmount *nfsd_mnt;
+
+ struct dentry *nfsd_client_dir;
+
/*
* reclaim_str_hashtbl[] holds known client info from previous reset/reboot
* used in reboot/reset lease grace period processing
@@ -106,6 +111,7 @@ struct nfsd_net {
*/
unsigned int max_connections;
+ u32 clientid_base;
u32 clientid_counter;
u32 clverifier_counter;
@@ -127,6 +133,44 @@ struct nfsd_net {
*/
bool *nfsd_versions;
bool *nfsd4_minorversions;
+
+ /*
+ * Duplicate reply cache
+ */
+ struct nfsd_drc_bucket *drc_hashtbl;
+ struct kmem_cache *drc_slab;
+
+ /* max number of entries allowed in the cache */
+ unsigned int max_drc_entries;
+
+ /* number of significant bits in the hash value */
+ unsigned int maskbits;
+ unsigned int drc_hashsize;
+
+ /*
+ * Stats and other tracking of on the duplicate reply cache.
+ * These fields and the "rc" fields in nfsdstats are modified
+ * with only the per-bucket cache lock, which isn't really safe
+ * and should be fixed if we want the statistics to be
+ * completely accurate.
+ */
+
+ /* total number of entries */
+ atomic_t num_drc_entries;
+
+ /* cache misses due only to checksum comparison failures */
+ unsigned int payload_misses;
+
+ /* amount of memory (in bytes) currently consumed by the DRC */
+ unsigned int drc_mem_usage;
+
+ /* longest hash chain seen */
+ unsigned int longest_chain;
+
+ /* size of cache when we saw the longest hash chain */
+ unsigned int longest_chain_cachesize;
+
+ struct shrinker nfsd_reply_cache_shrinker;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 2961016097ac..d1f285245af8 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -83,7 +83,7 @@ ent_init(struct cache_head *cnew, struct cache_head *citm)
new->type = itm->type;
strlcpy(new->name, itm->name, sizeof(new->name));
- strlcpy(new->authname, itm->authname, sizeof(new->name));
+ strlcpy(new->authname, itm->authname, sizeof(new->authname));
}
static void
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 618e66078ee5..7857942c5ca6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -42,6 +42,7 @@
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/addr.h>
#include <linux/jhash.h>
+#include <linux/string_helpers.h>
#include "xdr4.h"
#include "xdr4cb.h"
#include "vfs.h"
@@ -99,6 +100,13 @@ enum nfsd4_st_mutex_lock_subclass {
*/
static DECLARE_WAIT_QUEUE_HEAD(close_wq);
+/*
+ * A waitqueue where a writer to clients/#/ctl destroying a client can
+ * wait for cl_rpc_users to drop to 0 and then for the client to be
+ * unhashed.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(expiry_wq);
+
static struct kmem_cache *client_slab;
static struct kmem_cache *openowner_slab;
static struct kmem_cache *lockowner_slab;
@@ -138,7 +146,7 @@ static __be32 get_client_locked(struct nfs4_client *clp)
if (is_client_expired(clp))
return nfserr_expired;
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
return nfs_ok;
}
@@ -170,20 +178,24 @@ static void put_client_renew_locked(struct nfs4_client *clp)
lockdep_assert_held(&nn->client_lock);
- if (!atomic_dec_and_test(&clp->cl_refcount))
+ if (!atomic_dec_and_test(&clp->cl_rpc_users))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
+ else
+ wake_up_all(&expiry_wq);
}
static void put_client_renew(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock))
+ if (!atomic_dec_and_lock(&clp->cl_rpc_users, &nn->client_lock))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
+ else
+ wake_up_all(&expiry_wq);
spin_unlock(&nn->client_lock);
}
@@ -694,7 +706,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
idr_preload(GFP_KERNEL);
spin_lock(&cl->cl_lock);
- new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT);
+ /* Reserving 0 for start of file in nfsdfs "states" file: */
+ new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 1, 0, GFP_NOWAIT);
spin_unlock(&cl->cl_lock);
idr_preload_end();
if (new_id < 0)
@@ -1563,7 +1576,7 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
* Never use more than a third of the remaining memory,
* unless it's the only way to give this client a slot:
*/
- avail = clamp_t(int, avail, slotsize, total_avail/3);
+ avail = clamp_t(unsigned long, avail, slotsize, total_avail/3);
num = min_t(int, num, avail / slotsize);
nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock);
@@ -1844,7 +1857,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
if (clp == NULL)
return NULL;
- clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
+ xdr_netobj_dup(&clp->cl_name, &name, GFP_KERNEL);
if (clp->cl_name.data == NULL)
goto err_no_name;
clp->cl_ownerstr_hashtbl = kmalloc_array(OWNER_HASH_SIZE,
@@ -1854,10 +1867,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
goto err_no_hashtbl;
for (i = 0; i < OWNER_HASH_SIZE; i++)
INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
- clp->cl_name.len = name.len;
INIT_LIST_HEAD(&clp->cl_sessions);
idr_init(&clp->cl_stateids);
- atomic_set(&clp->cl_refcount, 0);
+ atomic_set(&clp->cl_rpc_users, 0);
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
INIT_LIST_HEAD(&clp->cl_idhash);
INIT_LIST_HEAD(&clp->cl_openowners);
@@ -1879,6 +1891,25 @@ err_no_name:
return NULL;
}
+static void __free_client(struct kref *k)
+{
+ struct nfsdfs_client *c = container_of(k, struct nfsdfs_client, cl_ref);
+ struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs);
+
+ free_svc_cred(&clp->cl_cred);
+ kfree(clp->cl_ownerstr_hashtbl);
+ kfree(clp->cl_name.data);
+ kfree(clp->cl_nii_domain.data);
+ kfree(clp->cl_nii_name.data);
+ idr_destroy(&clp->cl_stateids);
+ kmem_cache_free(client_slab, clp);
+}
+
+static void drop_client(struct nfs4_client *clp)
+{
+ kref_put(&clp->cl_nfsdfs.cl_ref, __free_client);
+}
+
static void
free_client(struct nfs4_client *clp)
{
@@ -1891,11 +1922,12 @@ free_client(struct nfs4_client *clp)
free_session(ses);
}
rpc_destroy_wait_queue(&clp->cl_cb_waitq);
- free_svc_cred(&clp->cl_cred);
- kfree(clp->cl_ownerstr_hashtbl);
- kfree(clp->cl_name.data);
- idr_destroy(&clp->cl_stateids);
- kmem_cache_free(client_slab, clp);
+ if (clp->cl_nfsd_dentry) {
+ nfsd_client_rmdir(clp->cl_nfsd_dentry);
+ clp->cl_nfsd_dentry = NULL;
+ wake_up_all(&expiry_wq);
+ }
+ drop_client(clp);
}
/* must be called under the client_lock */
@@ -1936,7 +1968,7 @@ unhash_client(struct nfs4_client *clp)
static __be32 mark_client_expired_locked(struct nfs4_client *clp)
{
- if (atomic_read(&clp->cl_refcount))
+ if (atomic_read(&clp->cl_rpc_users))
return nfserr_jukebox;
unhash_client_locked(clp);
return nfs_ok;
@@ -1989,6 +2021,7 @@ __destroy_client(struct nfs4_client *clp)
if (clp->cl_cb_conn.cb_xprt)
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
free_client(clp);
+ wake_up_all(&expiry_wq);
}
static void
@@ -2199,6 +2232,342 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
return s;
}
+static struct nfs4_client *get_nfsdfs_clp(struct inode *inode)
+{
+ struct nfsdfs_client *nc;
+ nc = get_nfsdfs_client(inode);
+ if (!nc)
+ return NULL;
+ return container_of(nc, struct nfs4_client, cl_nfsdfs);
+}
+
+static void seq_quote_mem(struct seq_file *m, char *data, int len)
+{
+ seq_printf(m, "\"");
+ seq_escape_mem_ascii(m, data, len);
+ seq_printf(m, "\"");
+}
+
+static int client_info_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct nfs4_client *clp;
+ u64 clid;
+
+ clp = get_nfsdfs_clp(inode);
+ if (!clp)
+ return -ENXIO;
+ memcpy(&clid, &clp->cl_clientid, sizeof(clid));
+ seq_printf(m, "clientid: 0x%llx\n", clid);
+ seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr);
+ seq_printf(m, "name: ");
+ seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
+ seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
+ if (clp->cl_nii_domain.data) {
+ seq_printf(m, "Implementation domain: ");
+ seq_quote_mem(m, clp->cl_nii_domain.data,
+ clp->cl_nii_domain.len);
+ seq_printf(m, "\nImplementation name: ");
+ seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len);
+ seq_printf(m, "\nImplementation time: [%ld, %ld]\n",
+ clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
+ }
+ drop_client(clp);
+
+ return 0;
+}
+
+static int client_info_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, client_info_show, inode);
+}
+
+static const struct file_operations client_info_fops = {
+ .open = client_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void *states_start(struct seq_file *s, loff_t *pos)
+ __acquires(&clp->cl_lock)
+{
+ struct nfs4_client *clp = s->private;
+ unsigned long id = *pos;
+ void *ret;
+
+ spin_lock(&clp->cl_lock);
+ ret = idr_get_next_ul(&clp->cl_stateids, &id);
+ *pos = id;
+ return ret;
+}
+
+static void *states_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct nfs4_client *clp = s->private;
+ unsigned long id = *pos;
+ void *ret;
+
+ id = *pos;
+ id++;
+ ret = idr_get_next_ul(&clp->cl_stateids, &id);
+ *pos = id;
+ return ret;
+}
+
+static void states_stop(struct seq_file *s, void *v)
+ __releases(&clp->cl_lock)
+{
+ struct nfs4_client *clp = s->private;
+
+ spin_unlock(&clp->cl_lock);
+}
+
+static void nfs4_show_superblock(struct seq_file *s, struct file *f)
+{
+ struct inode *inode = file_inode(f);
+
+ seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
+ MAJOR(inode->i_sb->s_dev),
+ MINOR(inode->i_sb->s_dev),
+ inode->i_ino);
+}
+
+static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo)
+{
+ seq_printf(s, "owner: ");
+ seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len);
+}
+
+static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_ol_stateid *ols;
+ struct nfs4_file *nf;
+ struct file *file;
+ struct nfs4_stateowner *oo;
+ unsigned int access, deny;
+
+ if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID)
+ return 0; /* XXX: or SEQ_SKIP? */
+ ols = openlockstateid(st);
+ oo = ols->st_stateowner;
+ nf = st->sc_file;
+ file = find_any_file(nf);
+
+ seq_printf(s, "- 0x%16phN: { type: open, ", &st->sc_stateid);
+
+ access = bmap_to_share_mode(ols->st_access_bmap);
+ deny = bmap_to_share_mode(ols->st_deny_bmap);
+
+ seq_printf(s, "access: \%s\%s, ",
+ access & NFS4_SHARE_ACCESS_READ ? "r" : "-",
+ access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
+ seq_printf(s, "deny: \%s\%s, ",
+ deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
+ deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
+
+ nfs4_show_superblock(s, file);
+ seq_printf(s, ", ");
+ nfs4_show_owner(s, oo);
+ seq_printf(s, " }\n");
+ fput(file);
+
+ return 0;
+}
+
+static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_ol_stateid *ols;
+ struct nfs4_file *nf;
+ struct file *file;
+ struct nfs4_stateowner *oo;
+
+ ols = openlockstateid(st);
+ oo = ols->st_stateowner;
+ nf = st->sc_file;
+ file = find_any_file(nf);
+
+ seq_printf(s, "- 0x%16phN: { type: lock, ", &st->sc_stateid);
+
+ /*
+ * Note: a lock stateid isn't really the same thing as a lock,
+ * it's the locking state held by one owner on a file, and there
+ * may be multiple (or no) lock ranges associated with it.
+ * (Same for the matter is true of open stateids.)
+ */
+
+ nfs4_show_superblock(s, file);
+ /* XXX: open stateid? */
+ seq_printf(s, ", ");
+ nfs4_show_owner(s, oo);
+ seq_printf(s, " }\n");
+ fput(file);
+
+ return 0;
+}
+
+static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_delegation *ds;
+ struct nfs4_file *nf;
+ struct file *file;
+
+ ds = delegstateid(st);
+ nf = st->sc_file;
+ file = nf->fi_deleg_file;
+
+ seq_printf(s, "- 0x%16phN: { type: deleg, ", &st->sc_stateid);
+
+ /* Kinda dead code as long as we only support read delegs: */
+ seq_printf(s, "access: %s, ",
+ ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
+
+ /* XXX: lease time, whether it's being recalled. */
+
+ nfs4_show_superblock(s, file);
+ seq_printf(s, " }\n");
+
+ return 0;
+}
+
+static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_layout_stateid *ls;
+ struct file *file;
+
+ ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
+ file = ls->ls_file;
+
+ seq_printf(s, "- 0x%16phN: { type: layout, ", &st->sc_stateid);
+
+ /* XXX: What else would be useful? */
+
+ nfs4_show_superblock(s, file);
+ seq_printf(s, " }\n");
+
+ return 0;
+}
+
+static int states_show(struct seq_file *s, void *v)
+{
+ struct nfs4_stid *st = v;
+
+ switch (st->sc_type) {
+ case NFS4_OPEN_STID:
+ return nfs4_show_open(s, st);
+ case NFS4_LOCK_STID:
+ return nfs4_show_lock(s, st);
+ case NFS4_DELEG_STID:
+ return nfs4_show_deleg(s, st);
+ case NFS4_LAYOUT_STID:
+ return nfs4_show_layout(s, st);
+ default:
+ return 0; /* XXX: or SEQ_SKIP? */
+ }
+ /* XXX: copy stateids? */
+}
+
+static struct seq_operations states_seq_ops = {
+ .start = states_start,
+ .next = states_next,
+ .stop = states_stop,
+ .show = states_show
+};
+
+static int client_states_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *s;
+ struct nfs4_client *clp;
+ int ret;
+
+ clp = get_nfsdfs_clp(inode);
+ if (!clp)
+ return -ENXIO;
+
+ ret = seq_open(file, &states_seq_ops);
+ if (ret)
+ return ret;
+ s = file->private_data;
+ s->private = clp;
+ return 0;
+}
+
+static int client_opens_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = file->private_data;
+ struct nfs4_client *clp = m->private;
+
+ /* XXX: alternatively, we could get/drop in seq start/stop */
+ drop_client(clp);
+ return 0;
+}
+
+static const struct file_operations client_states_fops = {
+ .open = client_states_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = client_opens_release,
+};
+
+/*
+ * Normally we refuse to destroy clients that are in use, but here the
+ * administrator is telling us to just do it. We also want to wait
+ * so the caller has a guarantee that the client's locks are gone by
+ * the time the write returns:
+ */
+static void force_expire_client(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ bool already_expired;
+
+ spin_lock(&clp->cl_lock);
+ clp->cl_time = 0;
+ spin_unlock(&clp->cl_lock);
+
+ wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
+ spin_lock(&nn->client_lock);
+ already_expired = list_empty(&clp->cl_lru);
+ if (!already_expired)
+ unhash_client_locked(clp);
+ spin_unlock(&nn->client_lock);
+
+ if (!already_expired)
+ expire_client(clp);
+ else
+ wait_event(expiry_wq, clp->cl_nfsd_dentry == NULL);
+}
+
+static ssize_t client_ctl_write(struct file *file, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ char *data;
+ struct nfs4_client *clp;
+
+ data = simple_transaction_get(file, buf, size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+ if (size != 7 || 0 != memcmp(data, "expire\n", 7))
+ return -EINVAL;
+ clp = get_nfsdfs_clp(file_inode(file));
+ if (!clp)
+ return -ENXIO;
+ force_expire_client(clp);
+ drop_client(clp);
+ return 7;
+}
+
+static const struct file_operations client_ctl_fops = {
+ .write = client_ctl_write,
+ .release = simple_transaction_release,
+};
+
+static const struct tree_descr client_files[] = {
+ [0] = {"info", &client_info_fops, S_IRUSR},
+ [1] = {"states", &client_states_fops, S_IRUSR},
+ [2] = {"ctl", &client_ctl_fops, S_IRUSR|S_IWUSR},
+ [3] = {""},
+};
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -2206,6 +2575,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
struct sockaddr *sa = svc_addr(rqstp);
int ret;
struct net *net = SVC_NET(rqstp);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
clp = alloc_client(name);
if (clp == NULL)
@@ -2216,13 +2586,22 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp);
return NULL;
}
+ gen_clid(clp, nn);
+ kref_init(&clp->cl_nfsdfs.cl_ref);
nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
clp->cl_time = get_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf);
- rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
+ memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
clp->cl_cb_session = NULL;
clp->net = net;
+ clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs,
+ clp->cl_clientid.cl_id - nn->clientid_base,
+ client_files);
+ if (!clp->cl_nfsd_dentry) {
+ free_client(clp);
+ return NULL;
+ }
return clp;
}
@@ -2533,6 +2912,22 @@ static bool client_has_state(struct nfs4_client *clp)
|| !list_empty(&clp->async_copies);
}
+static __be32 copy_impl_id(struct nfs4_client *clp,
+ struct nfsd4_exchange_id *exid)
+{
+ if (!exid->nii_domain.data)
+ return 0;
+ xdr_netobj_dup(&clp->cl_nii_domain, &exid->nii_domain, GFP_KERNEL);
+ if (!clp->cl_nii_domain.data)
+ return nfserr_jukebox;
+ xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL);
+ if (!clp->cl_nii_name.data)
+ return nfserr_jukebox;
+ clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec;
+ clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec;
+ return 0;
+}
+
__be32
nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -2559,6 +2954,9 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
new = create_client(exid->clname, rqstp, &verf);
if (new == NULL)
return nfserr_jukebox;
+ status = copy_impl_id(new, exid);
+ if (status)
+ goto out_nolock;
switch (exid->spa_how) {
case SP4_MACH_CRED:
@@ -2667,7 +3065,6 @@ out_new:
new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1];
- gen_clid(new, nn);
add_to_unconfirmed(new);
swap(new, conf);
out_copy:
@@ -3411,7 +3808,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
copy_clid(new, conf);
gen_confirm(new, nn);
} else /* case 4 (new client) or cases 2, 3 (client reboot): */
- gen_clid(new, nn);
+ ;
new->cl_minorversion = 0;
gen_callback(new, setclid, rqstp);
add_to_unconfirmed(new);
@@ -3632,12 +4029,11 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
if (!sop)
return NULL;
- sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL);
+ xdr_netobj_dup(&sop->so_owner, owner, GFP_KERNEL);
if (!sop->so_owner.data) {
kmem_cache_free(slab, sop);
return NULL;
}
- sop->so_owner.len = owner->len;
INIT_LIST_HEAD(&sop->so_stateids);
sop->so_client = clp;
@@ -4092,7 +4488,7 @@ static __be32 lookup_clientid(clientid_t *clid,
spin_unlock(&nn->client_lock);
return nfserr_expired;
}
- atomic_inc(&found->cl_refcount);
+ atomic_inc(&found->cl_rpc_users);
spin_unlock(&nn->client_lock);
/* Cache the nfs4_client in cstate! */
@@ -5725,12 +6121,11 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
if (fl->fl_lmops == &nfsd_posix_mng_ops) {
lo = (struct nfs4_lockowner *) fl->fl_owner;
- deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data,
- lo->lo_owner.so_owner.len, GFP_KERNEL);
+ xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner,
+ GFP_KERNEL);
if (!deny->ld_owner.data)
/* We just don't care that much */
goto nevermind;
- deny->ld_owner.len = lo->lo_owner.so_owner.len;
deny->ld_clientid = lo->lo_owner.so_client->cl_clientid;
} else {
nevermind:
@@ -6584,7 +6979,7 @@ nfs4_check_open_reclaim(clientid_t *clid,
static inline void
put_client(struct nfs4_client *clp)
{
- atomic_dec(&clp->cl_refcount);
+ atomic_dec(&clp->cl_rpc_users);
}
static struct nfs4_client *
@@ -6702,7 +7097,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
return;
lockdep_assert_held(&nn->client_lock);
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
list_add(&lst->st_locks, collect);
}
@@ -6731,7 +7126,7 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
* Despite the fact that these functions deal
* with 64-bit integers for "count", we must
* ensure that it doesn't blow up the
- * clp->cl_refcount. Throw a warning if we
+ * clp->cl_rpc_users. Throw a warning if we
* start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
@@ -6855,7 +7250,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
if (func) {
func(oop);
if (collect) {
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
list_add(&oop->oo_perclient, collect);
}
}
@@ -6863,7 +7258,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
/*
* Despite the fact that these functions deal with
* 64-bit integers for "count", we must ensure that
- * it doesn't blow up the clp->cl_refcount. Throw a
+ * it doesn't blow up the clp->cl_rpc_users. Throw a
* warning if we start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
@@ -6993,7 +7388,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
if (dp->dl_time != 0)
continue;
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
WARN_ON(!unhash_delegation_locked(dp));
list_add(&dp->dl_recall_lru, victims);
}
@@ -7001,7 +7396,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
/*
* Despite the fact that these functions deal with
* 64-bit integers for "count", we must ensure that
- * it doesn't blow up the clp->cl_refcount. Throw a
+ * it doesn't blow up the clp->cl_rpc_users. Throw a
* warning if we start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 52c4f6daa649..442811809f3d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -269,19 +269,13 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
return ret;
}
-/*
- * We require the high 32 bits of 'seconds' to be 0, and
- * we ignore all 32 bits of 'nseconds'.
- */
static __be32
-nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv)
+nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv)
{
DECODE_HEAD;
- u64 sec;
READ_BUF(12);
- p = xdr_decode_hyper(p, &sec);
- tv->tv_sec = sec;
+ p = xdr_decode_hyper(p, &tv->tv_sec);
tv->tv_nsec = be32_to_cpup(p++);
if (tv->tv_nsec >= (u32)1000000000)
return nfserr_inval;
@@ -320,7 +314,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
struct iattr *iattr, struct nfs4_acl **acl,
struct xdr_netobj *label, int *umask)
{
- struct timespec ts;
int expected_len, len = 0;
u32 dummy32;
char *buf;
@@ -422,8 +415,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
len += 12;
- status = nfsd4_decode_time(argp, &ts);
- iattr->ia_atime = timespec_to_timespec64(ts);
+ status = nfsd4_decode_time(argp, &iattr->ia_atime);
if (status)
return status;
iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
@@ -442,8 +434,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
len += 12;
- status = nfsd4_decode_time(argp, &ts);
- iattr->ia_mtime = timespec_to_timespec64(ts);
+ status = nfsd4_decode_time(argp, &iattr->ia_mtime);
if (status)
return status;
iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
@@ -1398,7 +1389,6 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
goto xdr_error;
}
- /* Ignore Implementation ID */
READ_BUF(4); /* nfs_impl_id4 array length */
dummy = be32_to_cpup(p++);
@@ -1406,21 +1396,19 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
goto xdr_error;
if (dummy == 1) {
- /* nii_domain */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
+ status = nfsd4_decode_opaque(argp, &exid->nii_domain);
+ if (status)
+ goto xdr_error;
/* nii_name */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
+ status = nfsd4_decode_opaque(argp, &exid->nii_name);
+ if (status)
+ goto xdr_error;
/* nii_date */
- READ_BUF(12);
- p += 3;
+ status = nfsd4_decode_time(argp, &exid->nii_time);
+ if (status)
+ goto xdr_error;
}
DECODE_TAIL;
}
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index da52b594362a..26ad75ae2be0 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -9,6 +9,7 @@
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sunrpc/addr.h>
@@ -35,48 +36,12 @@ struct nfsd_drc_bucket {
spinlock_t cache_lock;
};
-static struct nfsd_drc_bucket *drc_hashtbl;
-static struct kmem_cache *drc_slab;
-
-/* max number of entries allowed in the cache */
-static unsigned int max_drc_entries;
-
-/* number of significant bits in the hash value */
-static unsigned int maskbits;
-static unsigned int drc_hashsize;
-
-/*
- * Stats and other tracking of on the duplicate reply cache. All of these and
- * the "rc" fields in nfsdstats are protected by the cache_lock
- */
-
-/* total number of entries */
-static atomic_t num_drc_entries;
-
-/* cache misses due only to checksum comparison failures */
-static unsigned int payload_misses;
-
-/* amount of memory (in bytes) currently consumed by the DRC */
-static unsigned int drc_mem_usage;
-
-/* longest hash chain seen */
-static unsigned int longest_chain;
-
-/* size of cache when we saw the longest hash chain */
-static unsigned int longest_chain_cachesize;
-
static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
-static struct shrinker nfsd_reply_cache_shrinker = {
- .scan_objects = nfsd_reply_cache_scan,
- .count_objects = nfsd_reply_cache_count,
- .seeks = 1,
-};
-
/*
* Put a cap on the size of the DRC based on the amount of available
* low memory in the machine.
@@ -94,6 +59,9 @@ static struct shrinker nfsd_reply_cache_shrinker = {
* ...with a hard cap of 256k entries. In the worst case, each entry will be
* ~1k, so the above numbers should give a rough max of the amount of memory
* used in k.
+ *
+ * XXX: these limits are per-container, so memory used will increase
+ * linearly with number of containers. Maybe that's OK.
*/
static unsigned int
nfsd_cache_size_limit(void)
@@ -116,17 +84,18 @@ nfsd_hashsize(unsigned int limit)
}
static u32
-nfsd_cache_hash(__be32 xid)
+nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
{
- return hash_32(be32_to_cpu(xid), maskbits);
+ return hash_32(be32_to_cpu(xid), nn->maskbits);
}
static struct svc_cacherep *
-nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum)
+nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
+ struct nfsd_net *nn)
{
struct svc_cacherep *rp;
- rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
+ rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL);
if (rp) {
rp->c_state = RC_UNUSED;
rp->c_type = RC_NOCACHE;
@@ -147,91 +116,101 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum)
}
static void
-nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
+ struct nfsd_net *nn)
{
if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
- drc_mem_usage -= rp->c_replvec.iov_len;
+ nn->drc_mem_usage -= rp->c_replvec.iov_len;
kfree(rp->c_replvec.iov_base);
}
if (rp->c_state != RC_UNUSED) {
rb_erase(&rp->c_node, &b->rb_head);
list_del(&rp->c_lru);
- atomic_dec(&num_drc_entries);
- drc_mem_usage -= sizeof(*rp);
+ atomic_dec(&nn->num_drc_entries);
+ nn->drc_mem_usage -= sizeof(*rp);
}
- kmem_cache_free(drc_slab, rp);
+ kmem_cache_free(nn->drc_slab, rp);
}
static void
-nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
+ struct nfsd_net *nn)
{
spin_lock(&b->cache_lock);
- nfsd_reply_cache_free_locked(b, rp);
+ nfsd_reply_cache_free_locked(b, rp, nn);
spin_unlock(&b->cache_lock);
}
-int nfsd_reply_cache_init(void)
+int nfsd_reply_cache_init(struct nfsd_net *nn)
{
unsigned int hashsize;
unsigned int i;
int status = 0;
- max_drc_entries = nfsd_cache_size_limit();
- atomic_set(&num_drc_entries, 0);
- hashsize = nfsd_hashsize(max_drc_entries);
- maskbits = ilog2(hashsize);
+ nn->max_drc_entries = nfsd_cache_size_limit();
+ atomic_set(&nn->num_drc_entries, 0);
+ hashsize = nfsd_hashsize(nn->max_drc_entries);
+ nn->maskbits = ilog2(hashsize);
- status = register_shrinker(&nfsd_reply_cache_shrinker);
+ nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
+ nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
+ nn->nfsd_reply_cache_shrinker.seeks = 1;
+ status = register_shrinker(&nn->nfsd_reply_cache_shrinker);
if (status)
- return status;
-
- drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
- 0, 0, NULL);
- if (!drc_slab)
goto out_nomem;
- drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL);
- if (!drc_hashtbl) {
- drc_hashtbl = vzalloc(array_size(hashsize,
- sizeof(*drc_hashtbl)));
- if (!drc_hashtbl)
- goto out_nomem;
+ nn->drc_slab = kmem_cache_create("nfsd_drc",
+ sizeof(struct svc_cacherep), 0, 0, NULL);
+ if (!nn->drc_slab)
+ goto out_shrinker;
+
+ nn->drc_hashtbl = kcalloc(hashsize,
+ sizeof(*nn->drc_hashtbl), GFP_KERNEL);
+ if (!nn->drc_hashtbl) {
+ nn->drc_hashtbl = vzalloc(array_size(hashsize,
+ sizeof(*nn->drc_hashtbl)));
+ if (!nn->drc_hashtbl)
+ goto out_slab;
}
for (i = 0; i < hashsize; i++) {
- INIT_LIST_HEAD(&drc_hashtbl[i].lru_head);
- spin_lock_init(&drc_hashtbl[i].cache_lock);
+ INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head);
+ spin_lock_init(&nn->drc_hashtbl[i].cache_lock);
}
- drc_hashsize = hashsize;
+ nn->drc_hashsize = hashsize;
return 0;
+out_slab:
+ kmem_cache_destroy(nn->drc_slab);
+out_shrinker:
+ unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
- nfsd_reply_cache_shutdown();
return -ENOMEM;
}
-void nfsd_reply_cache_shutdown(void)
+void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
{
struct svc_cacherep *rp;
unsigned int i;
- unregister_shrinker(&nfsd_reply_cache_shrinker);
+ unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
- for (i = 0; i < drc_hashsize; i++) {
- struct list_head *head = &drc_hashtbl[i].lru_head;
+ for (i = 0; i < nn->drc_hashsize; i++) {
+ struct list_head *head = &nn->drc_hashtbl[i].lru_head;
while (!list_empty(head)) {
rp = list_first_entry(head, struct svc_cacherep, c_lru);
- nfsd_reply_cache_free_locked(&drc_hashtbl[i], rp);
+ nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i],
+ rp, nn);
}
}
- kvfree(drc_hashtbl);
- drc_hashtbl = NULL;
- drc_hashsize = 0;
+ kvfree(nn->drc_hashtbl);
+ nn->drc_hashtbl = NULL;
+ nn->drc_hashsize = 0;
- kmem_cache_destroy(drc_slab);
- drc_slab = NULL;
+ kmem_cache_destroy(nn->drc_slab);
+ nn->drc_slab = NULL;
}
/*
@@ -246,7 +225,7 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
}
static long
-prune_bucket(struct nfsd_drc_bucket *b)
+prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
{
struct svc_cacherep *rp, *tmp;
long freed = 0;
@@ -258,10 +237,10 @@ prune_bucket(struct nfsd_drc_bucket *b)
*/
if (rp->c_state == RC_INPROG)
continue;
- if (atomic_read(&num_drc_entries) <= max_drc_entries &&
+ if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries &&
time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
break;
- nfsd_reply_cache_free_locked(b, rp);
+ nfsd_reply_cache_free_locked(b, rp, nn);
freed++;
}
return freed;
@@ -272,18 +251,18 @@ prune_bucket(struct nfsd_drc_bucket *b)
* Also prune the oldest ones when the total exceeds the max number of entries.
*/
static long
-prune_cache_entries(void)
+prune_cache_entries(struct nfsd_net *nn)
{
unsigned int i;
long freed = 0;
- for (i = 0; i < drc_hashsize; i++) {
- struct nfsd_drc_bucket *b = &drc_hashtbl[i];
+ for (i = 0; i < nn->drc_hashsize; i++) {
+ struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i];
if (list_empty(&b->lru_head))
continue;
spin_lock(&b->cache_lock);
- freed += prune_bucket(b);
+ freed += prune_bucket(b, nn);
spin_unlock(&b->cache_lock);
}
return freed;
@@ -292,13 +271,19 @@ prune_cache_entries(void)
static unsigned long
nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
- return atomic_read(&num_drc_entries);
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+
+ return atomic_read(&nn->num_drc_entries);
}
static unsigned long
nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
- return prune_cache_entries();
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+
+ return prune_cache_entries(nn);
}
/*
* Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
@@ -334,11 +319,12 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
}
static int
-nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp)
+nfsd_cache_key_cmp(const struct svc_cacherep *key,
+ const struct svc_cacherep *rp, struct nfsd_net *nn)
{
if (key->c_key.k_xid == rp->c_key.k_xid &&
key->c_key.k_csum != rp->c_key.k_csum)
- ++payload_misses;
+ ++nn->payload_misses;
return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key));
}
@@ -349,7 +335,8 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp
* inserts an empty key on failure.
*/
static struct svc_cacherep *
-nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
+nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
+ struct nfsd_net *nn)
{
struct svc_cacherep *rp, *ret = key;
struct rb_node **p = &b->rb_head.rb_node,
@@ -362,7 +349,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
parent = *p;
rp = rb_entry(parent, struct svc_cacherep, c_node);
- cmp = nfsd_cache_key_cmp(key, rp);
+ cmp = nfsd_cache_key_cmp(key, rp, nn);
if (cmp < 0)
p = &parent->rb_left;
else if (cmp > 0)
@@ -376,14 +363,14 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
rb_insert_color(&key->c_node, &b->rb_head);
out:
/* tally hash chain length stats */
- if (entries > longest_chain) {
- longest_chain = entries;
- longest_chain_cachesize = atomic_read(&num_drc_entries);
- } else if (entries == longest_chain) {
+ if (entries > nn->longest_chain) {
+ nn->longest_chain = entries;
+ nn->longest_chain_cachesize = atomic_read(&nn->num_drc_entries);
+ } else if (entries == nn->longest_chain) {
/* prefer to keep the smallest cachesize possible here */
- longest_chain_cachesize = min_t(unsigned int,
- longest_chain_cachesize,
- atomic_read(&num_drc_entries));
+ nn->longest_chain_cachesize = min_t(unsigned int,
+ nn->longest_chain_cachesize,
+ atomic_read(&nn->num_drc_entries));
}
lru_put_end(b, ret);
@@ -400,11 +387,12 @@ out:
int
nfsd_cache_lookup(struct svc_rqst *rqstp)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp, *found;
__be32 xid = rqstp->rq_xid;
__wsum csum;
- u32 hash = nfsd_cache_hash(xid);
- struct nfsd_drc_bucket *b = &drc_hashtbl[hash];
+ u32 hash = nfsd_cache_hash(xid, nn);
+ struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash];
int type = rqstp->rq_cachetype;
int rtn = RC_DOIT;
@@ -420,16 +408,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
* Since the common case is a cache miss followed by an insert,
* preallocate an entry.
*/
- rp = nfsd_reply_cache_alloc(rqstp, csum);
+ rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
if (!rp) {
dprintk("nfsd: unable to allocate DRC entry!\n");
return rtn;
}
spin_lock(&b->cache_lock);
- found = nfsd_cache_insert(b, rp);
+ found = nfsd_cache_insert(b, rp, nn);
if (found != rp) {
- nfsd_reply_cache_free_locked(NULL, rp);
+ nfsd_reply_cache_free_locked(NULL, rp, nn);
rp = found;
goto found_entry;
}
@@ -438,11 +426,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
rqstp->rq_cacherep = rp;
rp->c_state = RC_INPROG;
- atomic_inc(&num_drc_entries);
- drc_mem_usage += sizeof(*rp);
+ atomic_inc(&nn->num_drc_entries);
+ nn->drc_mem_usage += sizeof(*rp);
/* go ahead and prune the cache */
- prune_bucket(b);
+ prune_bucket(b, nn);
out:
spin_unlock(&b->cache_lock);
return rtn;
@@ -477,7 +465,7 @@ found_entry:
break;
default:
printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
- nfsd_reply_cache_free_locked(b, rp);
+ nfsd_reply_cache_free_locked(b, rp, nn);
}
goto out;
@@ -502,6 +490,7 @@ found_entry:
void
nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp = rqstp->rq_cacherep;
struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
u32 hash;
@@ -512,15 +501,15 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
if (!rp)
return;
- hash = nfsd_cache_hash(rp->c_key.k_xid);
- b = &drc_hashtbl[hash];
+ hash = nfsd_cache_hash(rp->c_key.k_xid, nn);
+ b = &nn->drc_hashtbl[hash];
len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
len >>= 2;
/* Don't cache excessive amounts of data and XDR failures */
if (!statp || len > (256 >> 2)) {
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
@@ -535,18 +524,18 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
bufsize = len << 2;
cachv->iov_base = kmalloc(bufsize, GFP_KERNEL);
if (!cachv->iov_base) {
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
cachv->iov_len = bufsize;
memcpy(cachv->iov_base, statp, bufsize);
break;
case RC_NOCACHE:
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
spin_lock(&b->cache_lock);
- drc_mem_usage += bufsize;
+ nn->drc_mem_usage += bufsize;
lru_put_end(b, rp);
rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
rp->c_type = cachetype;
@@ -582,21 +571,26 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
*/
static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
{
- seq_printf(m, "max entries: %u\n", max_drc_entries);
+ struct nfsd_net *nn = v;
+
+ seq_printf(m, "max entries: %u\n", nn->max_drc_entries);
seq_printf(m, "num entries: %u\n",
- atomic_read(&num_drc_entries));
- seq_printf(m, "hash buckets: %u\n", 1 << maskbits);
- seq_printf(m, "mem usage: %u\n", drc_mem_usage);
+ atomic_read(&nn->num_drc_entries));
+ seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits);
+ seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage);
seq_printf(m, "cache hits: %u\n", nfsdstats.rchits);
seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses);
seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache);
- seq_printf(m, "payload misses: %u\n", payload_misses);
- seq_printf(m, "longest chain len: %u\n", longest_chain);
- seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize);
+ seq_printf(m, "payload misses: %u\n", nn->payload_misses);
+ seq_printf(m, "longest chain len: %u\n", nn->longest_chain);
+ seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
return 0;
}
int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file)
{
- return single_open(file, nfsd_reply_cache_stats_show, NULL);
+ struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info,
+ nfsd_net_id);
+
+ return single_open(file, nfsd_reply_cache_stats_show, nn);
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 62c58cfeb8d8..72fad54fc7e5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -16,6 +16,7 @@
#include <linux/sunrpc/gss_krb5_enctypes.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/module.h>
+#include <linux/fsnotify.h>
#include "idmap.h"
#include "nfsd.h"
@@ -53,6 +54,7 @@ enum {
NFSD_RecoveryDir,
NFSD_V4EndGrace,
#endif
+ NFSD_MaxReserved
};
/*
@@ -1147,8 +1149,201 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
* populating the filesystem.
*/
+/* Basically copying rpc_get_inode. */
+static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
+{
+ struct inode *inode = new_inode(sb);
+ if (!inode)
+ return NULL;
+ /* Following advice from simple_fill_super documentation: */
+ inode->i_ino = iunique(sb, NFSD_MaxReserved);
+ inode->i_mode = mode;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ switch (mode & S_IFMT) {
+ case S_IFDIR:
+ inode->i_fop = &simple_dir_operations;
+ inode->i_op = &simple_dir_inode_operations;
+ inc_nlink(inode);
+ default:
+ break;
+ }
+ return inode;
+}
+
+static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct inode *inode;
+
+ inode = nfsd_get_inode(dir->i_sb, mode);
+ if (!inode)
+ return -ENOMEM;
+ d_add(dentry, inode);
+ inc_nlink(dir);
+ fsnotify_mkdir(dir, dentry);
+ return 0;
+}
+
+static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name)
+{
+ struct inode *dir = parent->d_inode;
+ struct dentry *dentry;
+ int ret = -ENOMEM;
+
+ inode_lock(dir);
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+ goto out_err;
+ ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600);
+ if (ret)
+ goto out_err;
+ if (ncl) {
+ d_inode(dentry)->i_private = ncl;
+ kref_get(&ncl->cl_ref);
+ }
+out:
+ inode_unlock(dir);
+ return dentry;
+out_err:
+ dentry = ERR_PTR(ret);
+ goto out;
+}
+
+static void clear_ncl(struct inode *inode)
+{
+ struct nfsdfs_client *ncl = inode->i_private;
+
+ inode->i_private = NULL;
+ synchronize_rcu();
+ kref_put(&ncl->cl_ref, ncl->cl_release);
+}
+
+
+static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode)
+{
+ struct nfsdfs_client *nc = inode->i_private;
+
+ if (nc)
+ kref_get(&nc->cl_ref);
+ return nc;
+}
+
+struct nfsdfs_client *get_nfsdfs_client(struct inode *inode)
+{
+ struct nfsdfs_client *nc;
+
+ rcu_read_lock();
+ nc = __get_nfsdfs_client(inode);
+ rcu_read_unlock();
+ return nc;
+}
+/* from __rpc_unlink */
+static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry)
+{
+ int ret;
+
+ clear_ncl(d_inode(dentry));
+ dget(dentry);
+ ret = simple_unlink(dir, dentry);
+ d_delete(dentry);
+ dput(dentry);
+ WARN_ON_ONCE(ret);
+}
+
+static void nfsdfs_remove_files(struct dentry *root)
+{
+ struct dentry *dentry, *tmp;
+
+ list_for_each_entry_safe(dentry, tmp, &root->d_subdirs, d_child) {
+ if (!simple_positive(dentry)) {
+ WARN_ON_ONCE(1); /* I think this can't happen? */
+ continue;
+ }
+ nfsdfs_remove_file(d_inode(root), dentry);
+ }
+}
+
+/* XXX: cut'n'paste from simple_fill_super; figure out if we could share
+ * code instead. */
+static int nfsdfs_create_files(struct dentry *root,
+ const struct tree_descr *files)
+{
+ struct inode *dir = d_inode(root);
+ struct inode *inode;
+ struct dentry *dentry;
+ int i;
+
+ inode_lock(dir);
+ for (i = 0; files->name && files->name[0]; i++, files++) {
+ if (!files->name)
+ continue;
+ dentry = d_alloc_name(root, files->name);
+ if (!dentry)
+ goto out;
+ inode = nfsd_get_inode(d_inode(root)->i_sb,
+ S_IFREG | files->mode);
+ if (!inode) {
+ dput(dentry);
+ goto out;
+ }
+ inode->i_fop = files->ops;
+ inode->i_private = __get_nfsdfs_client(dir);
+ d_add(dentry, inode);
+ fsnotify_create(dir, dentry);
+ }
+ inode_unlock(dir);
+ return 0;
+out:
+ nfsdfs_remove_files(root);
+ inode_unlock(dir);
+ return -ENOMEM;
+}
+
+/* on success, returns positive number unique to that client. */
+struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
+ struct nfsdfs_client *ncl, u32 id,
+ const struct tree_descr *files)
+{
+ struct dentry *dentry;
+ char name[11];
+ int ret;
+
+ sprintf(name, "%u", id);
+
+ dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name);
+ if (IS_ERR(dentry)) /* XXX: tossing errors? */
+ return NULL;
+ ret = nfsdfs_create_files(dentry, files);
+ if (ret) {
+ nfsd_client_rmdir(dentry);
+ return NULL;
+ }
+ return dentry;
+}
+
+/* Taken from __rpc_rmdir: */
+void nfsd_client_rmdir(struct dentry *dentry)
+{
+ struct inode *dir = d_inode(dentry->d_parent);
+ struct inode *inode = d_inode(dentry);
+ int ret;
+
+ inode_lock(dir);
+ nfsdfs_remove_files(dentry);
+ clear_ncl(inode);
+ dget(dentry);
+ ret = simple_rmdir(dir, dentry);
+ WARN_ON_ONCE(ret);
+ d_delete(dentry);
+ inode_unlock(dir);
+}
+
static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
{
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ struct dentry *dentry;
+ int ret;
+
static const struct tree_descr nfsd_files[] = {
[NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO},
[NFSD_Export_features] = {"export_features",
@@ -1178,7 +1373,15 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
/* last one */ {""}
};
get_net(sb->s_fs_info);
- return simple_fill_super(sb, 0x6e667364, nfsd_files);
+ ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
+ if (ret)
+ return ret;
+ dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ nn->nfsd_client_dir = dentry;
+ return 0;
+
}
static struct dentry *nfsd_mount(struct file_system_type *fs_type,
@@ -1232,6 +1435,7 @@ unsigned int nfsd_net_id;
static __net_init int nfsd_init_net(struct net *net)
{
int retval;
+ struct vfsmount *mnt;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
retval = nfsd_export_init(net);
@@ -1242,18 +1446,33 @@ static __net_init int nfsd_init_net(struct net *net)
goto out_idmap_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
+ retval = nfsd_reply_cache_init(nn);
+ if (retval)
+ goto out_drc_error;
nn->nfsd4_lease = 90; /* default lease time */
nn->nfsd4_grace = 90;
nn->somebody_reclaimed = false;
nn->track_reclaim_completes = false;
nn->clverifier_counter = prandom_u32();
- nn->clientid_counter = prandom_u32();
+ nn->clientid_base = prandom_u32();
+ nn->clientid_counter = nn->clientid_base + 1;
nn->s2s_cp_cl_id = nn->clientid_counter++;
atomic_set(&nn->ntf_refcnt, 0);
init_waitqueue_head(&nn->ntf_wq);
+
+ mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
+ if (IS_ERR(mnt)) {
+ retval = PTR_ERR(mnt);
+ goto out_mount_err;
+ }
+ nn->nfsd_mnt = mnt;
return 0;
+out_mount_err:
+ nfsd_reply_cache_shutdown(nn);
+out_drc_error:
+ nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
@@ -1262,6 +1481,10 @@ out_export_error:
static __net_exit void nfsd_exit_net(struct net *net)
{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ mntput(nn->nfsd_mnt);
+ nfsd_reply_cache_shutdown(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
@@ -1295,9 +1518,6 @@ static int __init init_nfsd(void)
if (retval)
goto out_exit_pnfs;
nfsd_stat_init(); /* Statistics */
- retval = nfsd_reply_cache_init();
- if (retval)
- goto out_free_stat;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
retval = create_proc_exports_entry();
if (retval)
@@ -1311,8 +1531,6 @@ out_free_all:
remove_proc_entry("fs/nfs", NULL);
out_free_lockd:
nfsd_lockd_shutdown();
- nfsd_reply_cache_shutdown();
-out_free_stat:
nfsd_stat_shutdown();
nfsd_fault_inject_cleanup();
out_exit_pnfs:
@@ -1328,7 +1546,6 @@ out_unregister_pernet:
static void __exit exit_nfsd(void)
{
- nfsd_reply_cache_shutdown();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 24187b5dd638..af2947551e9c 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -22,6 +22,7 @@
#include <uapi/linux/nfsd/debug.h>
+#include "netns.h"
#include "stats.h"
#include "export.h"
@@ -86,6 +87,16 @@ int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_destroy(struct net *net);
+struct nfsdfs_client {
+ struct kref cl_ref;
+ void (*cl_release)(struct kref *kref);
+};
+
+struct nfsdfs_client *get_nfsdfs_client(struct inode *);
+struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
+ struct nfsdfs_client *ncl, u32 id, const struct tree_descr *);
+void nfsd_client_rmdir(struct dentry *dentry);
+
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
#ifdef CONFIG_NFSD_V2_ACL
extern const struct svc_version nfsd_acl_version2;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 0b74d371ed67..8cb20cab012b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -39,6 +39,7 @@
#include <linux/refcount.h>
#include <linux/sunrpc/svc_xprt.h>
#include "nfsfh.h"
+#include "nfsd.h"
typedef struct {
u32 cl_boot;
@@ -316,6 +317,10 @@ struct nfs4_client {
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
u32 cl_minorversion;
+ /* NFSv4.1 client implementation id: */
+ struct xdr_netobj cl_nii_domain;
+ struct xdr_netobj cl_nii_name;
+ struct timespec cl_nii_time;
/* for v4.0 and v4.1 callbacks: */
struct nfs4_cb_conn cl_cb_conn;
@@ -347,9 +352,13 @@ struct nfs4_client {
struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
u32 cl_exchange_flags;
/* number of rpc's in progress over an associated session: */
- atomic_t cl_refcount;
+ atomic_t cl_rpc_users;
+ struct nfsdfs_client cl_nfsdfs;
struct nfs4_op_map cl_spo_must_allow;
+ /* debugging info directory under nfsd/clients/ : */
+ struct dentry *cl_nfsd_dentry;
+
/* for nfs41 callbacks */
/* We currently support a single back channel with a single slot */
unsigned long cl_cb_slot_busy;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index fc24ee47eab5..c85783e536d5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -404,7 +404,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/*
* If utimes(2) and friends are called with times not NULL, we should
* not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
- * will return EACCESS, when the caller's effective UID does not match
+ * will return EACCES, when the caller's effective UID does not match
* the owner of the file, and the caller is not privileged. In this
* situation, we should return EPERM(notify_change will return this).
*/
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index feeb6d4bdffd..d64c870f998a 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -410,6 +410,9 @@ struct nfsd4_exchange_id {
int spa_how;
u32 spo_must_enforce[3];
u32 spo_must_allow[3];
+ struct xdr_netobj nii_domain;
+ struct xdr_netobj nii_name;
+ struct timespec64 nii_time;
};
struct nfsd4_sequence {
@@ -472,7 +475,7 @@ struct nfsd4_layoutcommit {
u32 lc_reclaim; /* request */
u32 lc_newoffset; /* request */
u64 lc_last_wr; /* request */
- struct timespec lc_mtime; /* request */
+ struct timespec64 lc_mtime; /* request */
u32 lc_layout_type; /* request */
u32 lc_up_len; /* layout length */
void *lc_up_layout; /* decoded by callback */
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index a90bb19dcfa2..91006f47e420 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -920,6 +920,22 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
return 0;
}
+static int fanotify_events_supported(struct path *path, __u64 mask)
+{
+ /*
+ * Some filesystems such as 'proc' acquire unusual locks when opening
+ * files. For them fanotify permission events have high chances of
+ * deadlocking the system - open done when reporting fanotify event
+ * blocks on this "unusual" lock while another process holding the lock
+ * waits for fanotify permission event to be answered. Just disallow
+ * permission events for such filesystems.
+ */
+ if (mask & FANOTIFY_PERM_EVENTS &&
+ path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM)
+ return -EINVAL;
+ return 0;
+}
+
static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
int dfd, const char __user *pathname)
{
@@ -1018,6 +1034,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (ret)
goto fput_and_out;
+ if (flags & FAN_MARK_ADD) {
+ ret = fanotify_events_supported(&path, mask);
+ if (ret)
+ goto path_put_and_out;
+ }
+
if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
ret = fanotify_test_fid(&path, &__fsid);
if (ret)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4eb2ebfac468..2ecef6155fc0 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -95,47 +95,6 @@ void fsnotify_sb_delete(struct super_block *sb)
}
/*
- * fsnotify_nameremove - a filename was removed from a directory
- *
- * This is mostly called under parent vfs inode lock so name and
- * dentry->d_parent should be stable. However there are some corner cases where
- * inode lock is not held. So to be on the safe side and be reselient to future
- * callers and out of tree users of d_delete(), we do not assume that d_parent
- * and d_name are stable and we use dget_parent() and
- * take_dentry_name_snapshot() to grab stable references.
- */
-void fsnotify_nameremove(struct dentry *dentry, int isdir)
-{
- struct dentry *parent;
- struct name_snapshot name;
- __u32 mask = FS_DELETE;
-
- /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */
- if (IS_ROOT(dentry))
- return;
-
- if (isdir)
- mask |= FS_ISDIR;
-
- parent = dget_parent(dentry);
- /* Avoid unneeded take_dentry_name_snapshot() */
- if (!(d_inode(parent)->i_fsnotify_mask & FS_DELETE) &&
- !(dentry->d_sb->s_fsnotify_mask & FS_DELETE))
- goto out_dput;
-
- take_dentry_name_snapshot(&name, dentry);
-
- fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE,
- &name.name, 0);
-
- release_dentry_name_snapshot(&name);
-
-out_dput:
- dput(parent);
-}
-EXPORT_SYMBOL(fsnotify_nameremove);
-
-/*
* Given an inode, first check if we care what happens to our children. Inotify
* and dnotify both tell their parents about events. If we care about any event
* on a child we run all of our children and set a dentry flag saying that the
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 62ee41b4bbd0..4c3dcb718961 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -98,3 +98,7 @@ config PROC_CHILDREN
Say Y if you are running any user-space software which takes benefit from
this interface. For example, rkt is such a piece of software.
+
+config PROC_PID_ARCH_STATUS
+ def_bool n
+ depends on PROC_FS
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 2edbb657f859..46dcb6f0eccf 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m,
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t%*pb\n",
- cpumask_pr_args(&task->cpus_allowed));
+ cpumask_pr_args(task->cpus_ptr));
seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
- cpumask_pr_args(&task->cpus_allowed));
+ cpumask_pr_args(task->cpus_ptr));
}
static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
@@ -462,7 +462,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
* a program is not able to use ptrace(2) in that case. It is
* safe because the task has stopped executing permanently.
*/
- if (permitted && (task->flags & PF_DUMPCORE)) {
+ if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) {
if (try_get_task_stack(task)) {
eip = KSTK_EIP(task);
esp = KSTK_ESP(task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9c8ca6cd3ce4..c40fca98f2b7 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3061,6 +3061,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_STACKLEAK_METRICS
ONE("stack_depth", S_IRUGO, proc_stack_depth),
#endif
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3077,8 +3080,7 @@ static const struct file_operations proc_tgid_base_operations = {
struct pid *tgid_pidfd_to_pid(const struct file *file)
{
- if (!d_is_dir(file->f_path.dentry) ||
- (file->f_op != &proc_tgid_base_operations))
+ if (file->f_op != &proc_tgid_base_operations)
return ERR_PTR(-EBADF);
return proc_pid(file_inode(file));
@@ -3449,6 +3451,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_LIVEPATCH
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
#endif
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
+#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 8b145e7b9661..522199e9525e 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -211,7 +211,7 @@ static struct file_system_type proc_fs_type = {
.init_fs_context = proc_init_fs_context,
.parameters = &proc_fs_parameters,
.kill_sb = proc_kill_sb,
- .fs_flags = FS_USERNS_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM,
};
void __init proc_root_init(void)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 7bb96fdd38ad..57957c91c6df 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -166,7 +166,7 @@ void __weak elfcorehdr_free(unsigned long long addr)
*/
ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
{
- return read_from_oldmem(buf, count, ppos, 0, false);
+ return read_from_oldmem(buf, count, ppos, 0, sev_active());
}
/*
@@ -174,7 +174,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
*/
ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
{
- return read_from_oldmem(buf, count, ppos, 0, sme_active());
+ return read_from_oldmem(buf, count, ppos, 0, mem_encrypt_active());
}
/*
@@ -374,7 +374,7 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
buflen);
start = m->paddr + *fpos - m->offset;
tmp = read_from_oldmem(buffer, tsz, &start,
- userbuf, sme_active());
+ userbuf, mem_encrypt_active());
if (tmp < 0)
return tmp;
buflen -= tsz;
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index 8e0a17ce3180..bfbfc2698070 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -112,27 +112,13 @@ static struct dentry *pstore_ftrace_dir;
void pstore_register_ftrace(void)
{
- struct dentry *file;
-
if (!psinfo->write)
return;
pstore_ftrace_dir = debugfs_create_dir("pstore", NULL);
- if (!pstore_ftrace_dir) {
- pr_err("%s: unable to create pstore directory\n", __func__);
- return;
- }
-
- file = debugfs_create_file("record_ftrace", 0600, pstore_ftrace_dir,
- NULL, &pstore_knob_fops);
- if (!file) {
- pr_err("%s: unable to create record_ftrace file\n", __func__);
- goto err_file;
- }
- return;
-err_file:
- debugfs_remove(pstore_ftrace_dir);
+ debugfs_create_file("record_ftrace", 0600, pstore_ftrace_dir, NULL,
+ &pstore_knob_fops);
}
void pstore_unregister_ftrace(void)
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 89a80b568a17..7fbe8f058220 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -318,22 +318,21 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
goto fail;
inode->i_mode = S_IFREG | 0444;
inode->i_fop = &pstore_file_operations;
- private = kzalloc(sizeof(*private), GFP_KERNEL);
- if (!private)
- goto fail_alloc;
- private->record = record;
-
scnprintf(name, sizeof(name), "%s-%s-%llu%s",
pstore_type_to_name(record->type),
record->psi->name, record->id,
record->compressed ? ".enc.z" : "");
+ private = kzalloc(sizeof(*private), GFP_KERNEL);
+ if (!private)
+ goto fail_inode;
+
dentry = d_alloc_name(root, name);
if (!dentry)
goto fail_private;
+ private->record = record;
inode->i_size = private->total_size = size;
-
inode->i_private = private;
if (record->time.tv_sec)
@@ -349,7 +348,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
fail_private:
free_pstore_private(private);
-fail_alloc:
+fail_inode:
iput(inode);
fail:
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 5b7709894415..2bb3468fc93a 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -655,6 +655,7 @@ static int ramoops_parse_dt(struct platform_device *pdev,
struct ramoops_platform_data *pdata)
{
struct device_node *of_node = pdev->dev.of_node;
+ struct device_node *parent_node;
struct resource *res;
u32 value;
int ret;
@@ -689,6 +690,26 @@ static int ramoops_parse_dt(struct platform_device *pdev,
#undef parse_size
+ /*
+ * Some old Chromebooks relied on the kernel setting the
+ * console_size and pmsg_size to the record size since that's
+ * what the downstream kernel did. These same Chromebooks had
+ * "ramoops" straight under the root node which isn't
+ * according to the current upstream bindings (though it was
+ * arguably acceptable under a prior version of the bindings).
+ * Let's make those old Chromebooks work by detecting that
+ * we're not a child of "reserved-memory" and mimicking the
+ * expected behavior.
+ */
+ parent_node = of_get_parent(of_node);
+ if (!of_node_name_eq(parent_node, "reserved-memory") &&
+ !pdata->console_size && !pdata->ftrace_size &&
+ !pdata->pmsg_size && !pdata->ecc_info.ecc_size) {
+ pdata->console_size = pdata->record_size;
+ pdata->pmsg_size = pdata->record_size;
+ }
+ of_node_put(parent_node);
+
return 0;
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 58f15a083dd1..be9c471cdbc8 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -223,9 +223,9 @@ static void put_quota_format(struct quota_format_type *fmt)
/*
* Dquot List Management:
- * The quota code uses three lists for dquot management: the inuse_list,
- * free_dquots, and dquot_hash[] array. A single dquot structure may be
- * on all three lists, depending on its current state.
+ * The quota code uses four lists for dquot management: the inuse_list,
+ * free_dquots, dqi_dirty_list, and dquot_hash[] array. A single dquot
+ * structure may be on some of those lists, depending on its current state.
*
* All dquots are placed to the end of inuse_list when first created, and this
* list is used for invalidate operation, which must look at every dquot.
@@ -236,6 +236,11 @@ static void put_quota_format(struct quota_format_type *fmt)
* dqstats.free_dquots gives the number of dquots on the list. When
* dquot is invalidated it's completely released from memory.
*
+ * Dirty dquots are added to the dqi_dirty_list of quota_info when mark
+ * dirtied, and this list is searched when writing dirty dquots back to
+ * quota file. Note that some filesystems do dirty dquot tracking on their
+ * own (e.g. in a journal) and thus don't use dqi_dirty_list.
+ *
* Dquots with a specific identity (device, type and id) are placed on
* one of the dquot_hash[] hash chains. The provides an efficient search
* mechanism to locate a specific dquot.
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index fd5dd806f1b9..cb13fb76dbee 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -331,9 +331,9 @@ static int quota_state_to_flags(struct qc_state *state)
return flags;
}
-static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
+static int quota_getstate(struct super_block *sb, int type,
+ struct fs_quota_stat *fqs)
{
- int type;
struct qc_state state;
int ret;
@@ -349,14 +349,7 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
if (!fqs->qs_flags)
return -ENOSYS;
fqs->qs_incoredqs = state.s_incoredqs;
- /*
- * GETXSTATE quotactl has space for just one set of time limits so
- * report them for the first enabled quota type
- */
- for (type = 0; type < MAXQUOTAS; type++)
- if (state.s_state[type].flags & QCI_ACCT_ENABLED)
- break;
- BUG_ON(type == MAXQUOTAS);
+
fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
@@ -391,22 +384,22 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
return 0;
}
-static int quota_getxstate(struct super_block *sb, void __user *addr)
+static int quota_getxstate(struct super_block *sb, int type, void __user *addr)
{
struct fs_quota_stat fqs;
int ret;
if (!sb->s_qcop->get_state)
return -ENOSYS;
- ret = quota_getstate(sb, &fqs);
+ ret = quota_getstate(sb, type, &fqs);
if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
return -EFAULT;
return ret;
}
-static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
+static int quota_getstatev(struct super_block *sb, int type,
+ struct fs_quota_statv *fqs)
{
- int type;
struct qc_state state;
int ret;
@@ -422,14 +415,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
if (!fqs->qs_flags)
return -ENOSYS;
fqs->qs_incoredqs = state.s_incoredqs;
- /*
- * GETXSTATV quotactl has space for just one set of time limits so
- * report them for the first enabled quota type
- */
- for (type = 0; type < MAXQUOTAS; type++)
- if (state.s_state[type].flags & QCI_ACCT_ENABLED)
- break;
- BUG_ON(type == MAXQUOTAS);
+
fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
@@ -455,7 +441,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
return 0;
}
-static int quota_getxstatev(struct super_block *sb, void __user *addr)
+static int quota_getxstatev(struct super_block *sb, int type, void __user *addr)
{
struct fs_quota_statv fqs;
int ret;
@@ -474,7 +460,7 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr)
default:
return -EINVAL;
}
- ret = quota_getstatev(sb, &fqs);
+ ret = quota_getstatev(sb, type, &fqs);
if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
return -EFAULT;
return ret;
@@ -744,9 +730,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
case Q_XQUOTARM:
return quota_rmxquota(sb, addr);
case Q_XGETQSTAT:
- return quota_getxstate(sb, addr);
+ return quota_getxstate(sb, type, addr);
case Q_XGETQSTATV:
- return quota_getxstatev(sb, addr);
+ return quota_getxstatev(sb, type, addr);
case Q_XSETQLIM:
return quota_setxquota(sb, type, id, addr);
case Q_XGETQUOTA:
diff --git a/fs/read_write.c b/fs/read_write.c
index c543d965e288..1f5088dec566 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1565,6 +1565,58 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
}
#endif
+/**
+ * generic_copy_file_range - copy data between two files
+ * @file_in: file structure to read from
+ * @pos_in: file offset to read from
+ * @file_out: file structure to write data to
+ * @pos_out: file offset to write data to
+ * @len: amount of data to copy
+ * @flags: copy flags
+ *
+ * This is a generic filesystem helper to copy data from one file to another.
+ * It has no constraints on the source or destination file owners - the files
+ * can belong to different superblocks and different filesystem types. Short
+ * copies are allowed.
+ *
+ * This should be called from the @file_out filesystem, as per the
+ * ->copy_file_range() method.
+ *
+ * Returns the number of bytes copied or a negative error indicating the
+ * failure.
+ */
+
+ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
+ len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
+}
+EXPORT_SYMBOL(generic_copy_file_range);
+
+static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ /*
+ * Although we now allow filesystems to handle cross sb copy, passing
+ * a file of the wrong filesystem type to filesystem driver can result
+ * in an attempt to dereference the wrong type of ->private_data, so
+ * avoid doing that until we really have a good reason. NFS defines
+ * several different file_system_type structures, but they all end up
+ * using the same ->copy_file_range() function pointer.
+ */
+ if (file_out->f_op->copy_file_range &&
+ file_out->f_op->copy_file_range == file_in->f_op->copy_file_range)
+ return file_out->f_op->copy_file_range(file_in, pos_in,
+ file_out, pos_out,
+ len, flags);
+
+ return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
+ flags);
+}
+
/*
* copy_file_range() differs from regular file read and write in that it
* specifically allows return partial success. When it does so is up to
@@ -1574,17 +1626,15 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
- struct inode *inode_in = file_inode(file_in);
- struct inode *inode_out = file_inode(file_out);
ssize_t ret;
if (flags != 0)
return -EINVAL;
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- return -EISDIR;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- return -EINVAL;
+ ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
+ flags);
+ if (unlikely(ret))
+ return ret;
ret = rw_verify_area(READ, file_in, &pos_in, len);
if (unlikely(ret))
@@ -1594,15 +1644,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
if (unlikely(ret))
return ret;
- if (!(file_in->f_mode & FMODE_READ) ||
- !(file_out->f_mode & FMODE_WRITE) ||
- (file_out->f_flags & O_APPEND))
- return -EBADF;
-
- /* this could be relaxed once a method supports cross-fs copies */
- if (inode_in->i_sb != inode_out->i_sb)
- return -EXDEV;
-
if (len == 0)
return 0;
@@ -1612,7 +1653,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* Try cloning first, this is supported by more file systems, and
* more efficient if both clone and copy are supported (e.g. NFS).
*/
- if (file_in->f_op->remap_file_range) {
+ if (file_in->f_op->remap_file_range &&
+ file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
loff_t cloned;
cloned = file_in->f_op->remap_file_range(file_in, pos_in,
@@ -1625,16 +1667,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
}
}
- if (file_out->f_op->copy_file_range) {
- ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
- pos_out, len, flags);
- if (ret != -EOPNOTSUPP)
- goto done;
- }
-
- ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
- len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
-
+ ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
+ flags);
+ WARN_ON_ONCE(ret == -EOPNOTSUPP);
done:
if (ret > 0) {
fsnotify_access(file_in);
@@ -1951,25 +1986,10 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
return ret;
/* If can't alter the file contents, we're done. */
- if (!(remap_flags & REMAP_FILE_DEDUP)) {
- /* Update the timestamps, since we can alter file contents. */
- if (!(file_out->f_mode & FMODE_NOCMTIME)) {
- ret = file_update_time(file_out);
- if (ret)
- return ret;
- }
-
- /*
- * Clear the security bits if the process is not being run by
- * root. This keeps people from modifying setuid and setgid
- * binaries.
- */
- ret = file_remove_privs(file_out);
- if (ret)
- return ret;
- }
+ if (!(remap_flags & REMAP_FILE_DEDUP))
+ ret = file_modified(file_out);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(generic_remap_file_range_prep);
@@ -1977,29 +1997,21 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags)
{
- struct inode *inode_in = file_inode(file_in);
- struct inode *inode_out = file_inode(file_out);
loff_t ret;
WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- return -EISDIR;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- return -EINVAL;
-
/*
* FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
* the same mount. Practically, they only need to be on the same file
* system.
*/
- if (inode_in->i_sb != inode_out->i_sb)
+ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
return -EXDEV;
- if (!(file_in->f_mode & FMODE_READ) ||
- !(file_out->f_mode & FMODE_WRITE) ||
- (file_out->f_flags & O_APPEND))
- return -EBADF;
+ ret = generic_file_rw_checks(file_in, file_out);
+ if (ret < 0)
+ return ret;
if (!file_in->f_op->remap_file_range)
return -EOPNOTSUPP;
diff --git a/fs/select.c b/fs/select.c
index 6cbc9ff56ba0..a4d8f6e8b63c 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -758,10 +758,9 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
return ret;
ret = core_sys_select(n, inp, outp, exp, to);
+ restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND);
ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
- restore_user_sigmask(sigmask, &sigsaved);
-
return ret;
}
@@ -1106,8 +1105,7 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
ret = do_sys_poll(ufds, nfds, to);
- restore_user_sigmask(sigmask, &sigsaved);
-
+ restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
/* We can restart this syscall, usually */
if (ret == -EINTR)
ret = -ERESTARTNOHAND;
@@ -1142,8 +1140,7 @@ SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds,
ret = do_sys_poll(ufds, nfds, to);
- restore_user_sigmask(sigmask, &sigsaved);
-
+ restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
/* We can restart this syscall, usually */
if (ret == -EINTR)
ret = -ERESTARTNOHAND;
@@ -1350,10 +1347,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp,
return ret;
ret = compat_core_sys_select(n, inp, outp, exp, to);
+ restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND);
ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
- restore_user_sigmask(sigmask, &sigsaved);
-
return ret;
}
@@ -1425,8 +1421,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds,
ret = do_sys_poll(ufds, nfds, to);
- restore_user_sigmask(sigmask, &sigsaved);
-
+ restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
/* We can restart this syscall, usually */
if (ret == -EINTR)
ret = -ERESTARTNOHAND;
@@ -1461,8 +1456,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds,
ret = do_sys_poll(ufds, nfds, to);
- restore_user_sigmask(sigmask, &sigsaved);
-
+ restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
/* We can restart this syscall, usually */
if (ret == -EINTR)
ret = -ERESTARTNOHAND;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index abe27ec43176..04f09689cd6d 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -384,6 +384,17 @@ void seq_escape(struct seq_file *m, const char *s, const char *esc)
}
EXPORT_SYMBOL(seq_escape);
+void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz)
+{
+ char *buf;
+ size_t size = seq_get_buf(m, &buf);
+ int ret;
+
+ ret = string_escape_mem_ascii(src, isz, buf, size);
+ seq_commit(m, ret < size ? ret : -1);
+}
+EXPORT_SYMBOL(seq_escape_mem_ascii);
+
void seq_vprintf(struct seq_file *m, const char *f, va_list args)
{
int len;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 57038604d4a8..d41c21fef138 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -175,6 +175,26 @@ int sysfs_create_group(struct kobject *kobj,
}
EXPORT_SYMBOL_GPL(sysfs_create_group);
+static int internal_create_groups(struct kobject *kobj, int update,
+ const struct attribute_group **groups)
+{
+ int error = 0;
+ int i;
+
+ if (!groups)
+ return 0;
+
+ for (i = 0; groups[i]; i++) {
+ error = internal_create_group(kobj, update, groups[i]);
+ if (error) {
+ while (--i >= 0)
+ sysfs_remove_group(kobj, groups[i]);
+ break;
+ }
+ }
+ return error;
+}
+
/**
* sysfs_create_groups - given a directory kobject, create a bunch of attribute groups
* @kobj: The kobject to create the group on
@@ -191,25 +211,29 @@ EXPORT_SYMBOL_GPL(sysfs_create_group);
int sysfs_create_groups(struct kobject *kobj,
const struct attribute_group **groups)
{
- int error = 0;
- int i;
-
- if (!groups)
- return 0;
-
- for (i = 0; groups[i]; i++) {
- error = sysfs_create_group(kobj, groups[i]);
- if (error) {
- while (--i >= 0)
- sysfs_remove_group(kobj, groups[i]);
- break;
- }
- }
- return error;
+ return internal_create_groups(kobj, 0, groups);
}
EXPORT_SYMBOL_GPL(sysfs_create_groups);
/**
+ * sysfs_update_groups - given a directory kobject, create a bunch of attribute groups
+ * @kobj: The kobject to update the group on
+ * @groups: The attribute groups to update, NULL terminated
+ *
+ * This function update a bunch of attribute groups. If an error occurs when
+ * updating a group, all previously updated groups will be removed together
+ * with already existing (not updated) attributes.
+ *
+ * Returns 0 on success or error code from sysfs_update_group on failure.
+ */
+int sysfs_update_groups(struct kobject *kobj,
+ const struct attribute_group **groups)
+{
+ return internal_create_groups(kobj, 1, groups);
+}
+EXPORT_SYMBOL_GPL(sysfs_update_groups);
+
+/**
* sysfs_update_group - given a directory kobject, update an attribute group
* @kobj: The kobject to update the group on
* @grp: The attribute group to update
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index a5bab190a297..eeeae0475da9 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -505,9 +505,12 @@ static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
switch (dentry->d_inode->i_mode & S_IFMT) {
case S_IFDIR:
ret = simple_rmdir(parent->d_inode, dentry);
+ if (!ret)
+ fsnotify_rmdir(parent->d_inode, dentry);
break;
default:
simple_unlink(parent->d_inode, dentry);
+ fsnotify_unlink(parent->d_inode, dentry);
break;
}
if (!ret)
diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c
index 4aaedf2d7f44..22be7aeb96c4 100644
--- a/fs/ubifs/crypto.c
+++ b/fs/ubifs/crypto.c
@@ -29,8 +29,8 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn,
{
struct ubifs_info *c = inode->i_sb->s_fs_info;
void *p = &dn->data;
- struct page *ret;
unsigned int pad_len = round_up(in_len, UBIFS_CIPHER_BLOCK_SIZE);
+ int err;
ubifs_assert(c, pad_len <= *out_len);
dn->compr_size = cpu_to_le16(in_len);
@@ -39,11 +39,11 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn,
if (pad_len != in_len)
memset(p + in_len, 0, pad_len - in_len);
- ret = fscrypt_encrypt_page(inode, virt_to_page(&dn->data), pad_len,
- offset_in_page(&dn->data), block, GFP_NOFS);
- if (IS_ERR(ret)) {
- ubifs_err(c, "fscrypt_encrypt_page failed: %ld", PTR_ERR(ret));
- return PTR_ERR(ret);
+ err = fscrypt_encrypt_block_inplace(inode, virt_to_page(p), pad_len,
+ offset_in_page(p), block, GFP_NOFS);
+ if (err) {
+ ubifs_err(c, "fscrypt_encrypt_block_inplace() failed: %d", err);
+ return err;
}
*out_len = pad_len;
@@ -64,10 +64,11 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn,
}
ubifs_assert(c, dlen <= UBIFS_BLOCK_SIZE);
- err = fscrypt_decrypt_page(inode, virt_to_page(&dn->data), dlen,
- offset_in_page(&dn->data), block);
+ err = fscrypt_decrypt_block_inplace(inode, virt_to_page(&dn->data),
+ dlen, offset_in_page(&dn->data),
+ block);
if (err) {
- ubifs_err(c, "fscrypt_decrypt_page failed: %i", err);
+ ubifs_err(c, "fscrypt_decrypt_block_inplace() failed: %d", err);
return err;
}
*out_len = clen;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index e7276932e433..9bb18311a22f 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -470,13 +470,15 @@ static struct buffer_head *udf_getblk(struct inode *inode, udf_pblk_t block,
return NULL;
}
-/* Extend the file by 'blocks' blocks, return the number of extents added */
+/* Extend the file with new blocks totaling 'new_block_bytes',
+ * return the number of extents added
+ */
static int udf_do_extend_file(struct inode *inode,
struct extent_position *last_pos,
struct kernel_long_ad *last_ext,
- sector_t blocks)
+ loff_t new_block_bytes)
{
- sector_t add;
+ uint32_t add;
int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
struct super_block *sb = inode->i_sb;
struct kernel_lb_addr prealloc_loc = {};
@@ -486,7 +488,7 @@ static int udf_do_extend_file(struct inode *inode,
/* The previous extent is fake and we should not extend by anything
* - there's nothing to do... */
- if (!blocks && fake)
+ if (!new_block_bytes && fake)
return 0;
iinfo = UDF_I(inode);
@@ -517,13 +519,12 @@ static int udf_do_extend_file(struct inode *inode,
/* Can we merge with the previous extent? */
if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) ==
EXT_NOT_RECORDED_NOT_ALLOCATED) {
- add = ((1 << 30) - sb->s_blocksize -
- (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) >>
- sb->s_blocksize_bits;
- if (add > blocks)
- add = blocks;
- blocks -= add;
- last_ext->extLength += add << sb->s_blocksize_bits;
+ add = (1 << 30) - sb->s_blocksize -
+ (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
+ if (add > new_block_bytes)
+ add = new_block_bytes;
+ new_block_bytes -= add;
+ last_ext->extLength += add;
}
if (fake) {
@@ -544,28 +545,27 @@ static int udf_do_extend_file(struct inode *inode,
}
/* Managed to do everything necessary? */
- if (!blocks)
+ if (!new_block_bytes)
goto out;
/* All further extents will be NOT_RECORDED_NOT_ALLOCATED */
last_ext->extLocation.logicalBlockNum = 0;
last_ext->extLocation.partitionReferenceNum = 0;
- add = (1 << (30-sb->s_blocksize_bits)) - 1;
- last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
- (add << sb->s_blocksize_bits);
+ add = (1 << 30) - sb->s_blocksize;
+ last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | add;
/* Create enough extents to cover the whole hole */
- while (blocks > add) {
- blocks -= add;
+ while (new_block_bytes > add) {
+ new_block_bytes -= add;
err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
if (err)
return err;
count++;
}
- if (blocks) {
+ if (new_block_bytes) {
last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
- (blocks << sb->s_blocksize_bits);
+ new_block_bytes;
err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
if (err)
@@ -596,6 +596,24 @@ out:
return count;
}
+/* Extend the final block of the file to final_block_len bytes */
+static void udf_do_extend_final_block(struct inode *inode,
+ struct extent_position *last_pos,
+ struct kernel_long_ad *last_ext,
+ uint32_t final_block_len)
+{
+ struct super_block *sb = inode->i_sb;
+ uint32_t added_bytes;
+
+ added_bytes = final_block_len -
+ (last_ext->extLength & (sb->s_blocksize - 1));
+ last_ext->extLength += added_bytes;
+ UDF_I(inode)->i_lenExtents += added_bytes;
+
+ udf_write_aext(inode, last_pos, &last_ext->extLocation,
+ last_ext->extLength, 1);
+}
+
static int udf_extend_file(struct inode *inode, loff_t newsize)
{
@@ -605,10 +623,12 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
int8_t etype;
struct super_block *sb = inode->i_sb;
sector_t first_block = newsize >> sb->s_blocksize_bits, offset;
+ unsigned long partial_final_block;
int adsize;
struct udf_inode_info *iinfo = UDF_I(inode);
struct kernel_long_ad extent;
- int err;
+ int err = 0;
+ int within_final_block;
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
adsize = sizeof(struct short_ad);
@@ -618,18 +638,8 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
BUG();
etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
+ within_final_block = (etype != -1);
- /* File has extent covering the new size (could happen when extending
- * inside a block)? */
- if (etype != -1)
- return 0;
- if (newsize & (sb->s_blocksize - 1))
- offset++;
- /* Extended file just to the boundary of the last file block? */
- if (offset == 0)
- return 0;
-
- /* Truncate is extending the file by 'offset' blocks */
if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) ||
(epos.bh && epos.offset == sizeof(struct allocExtDesc))) {
/* File has no extents at all or has empty last
@@ -643,7 +653,22 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
&extent.extLength, 0);
extent.extLength |= etype << 30;
}
- err = udf_do_extend_file(inode, &epos, &extent, offset);
+
+ partial_final_block = newsize & (sb->s_blocksize - 1);
+
+ /* File has extent covering the new size (could happen when extending
+ * inside a block)?
+ */
+ if (within_final_block) {
+ /* Extending file within the last file block */
+ udf_do_extend_final_block(inode, &epos, &extent,
+ partial_final_block);
+ } else {
+ loff_t add = ((loff_t)offset << sb->s_blocksize_bits) |
+ partial_final_block;
+ err = udf_do_extend_file(inode, &epos, &extent, add);
+ }
+
if (err < 0)
goto out;
err = 0;
@@ -745,6 +770,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
/* Are we beyond EOF? */
if (etype == -1) {
int ret;
+ loff_t hole_len;
isBeyondEOF = true;
if (count) {
if (c)
@@ -760,7 +786,8 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
startnum = (offset > 0);
}
/* Create extents for the hole between EOF and offset */
- ret = udf_do_extend_file(inode, &prev_epos, laarr, offset);
+ hole_len = (loff_t)offset << inode->i_blkbits;
+ ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len);
if (ret < 0) {
*err = ret;
newblock = 0;
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 6afab4fdce90..71ca4d047d65 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -73,6 +73,34 @@ int utf8_strncasecmp(const struct unicode_map *um,
}
EXPORT_SYMBOL(utf8_strncasecmp);
+/* String cf is expected to be a valid UTF-8 casefolded
+ * string.
+ */
+int utf8_strncasecmp_folded(const struct unicode_map *um,
+ const struct qstr *cf,
+ const struct qstr *s1)
+{
+ const struct utf8data *data = utf8nfdicf(um->version);
+ struct utf8cursor cur1;
+ int c1, c2;
+ int i = 0;
+
+ if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+ return -EINVAL;
+
+ do {
+ c1 = utf8byte(&cur1);
+ c2 = cf->name[i++];
+ if (c1 < 0)
+ return -EINVAL;
+ if (c1 != c2)
+ return 1;
+ } while (c1);
+
+ return 0;
+}
+EXPORT_SYMBOL(utf8_strncasecmp_folded);
+
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ae0b8b5f69e6..ccbdbd62f0d8 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -40,6 +40,16 @@ enum userfaultfd_state {
/*
* Start with fault_pending_wqh and fault_wqh so they're more likely
* to be in the same cacheline.
+ *
+ * Locking order:
+ * fd_wqh.lock
+ * fault_pending_wqh.lock
+ * fault_wqh.lock
+ * event_wqh.lock
+ *
+ * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
+ * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
+ * also taken in IRQ context.
*/
struct userfaultfd_ctx {
/* waitqueue head for the pending (i.e. not read) userfaults */
@@ -458,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
TASK_KILLABLE;
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
/*
* After the __add_wait_queue the uwq is visible to userland
* through poll/read().
@@ -470,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
* __add_wait_queue.
*/
set_current_state(blocking_state);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
if (!is_vm_hugetlb_page(vmf->vma))
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
@@ -552,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
* kernel stack can be released after the list_del_init.
*/
if (!list_empty_careful(&uwq.wq.entry)) {
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
/*
* No need of list_del_init(), the uwq on the stack
* will be freed shortly anyway.
*/
list_del(&uwq.wq.entry);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
}
/*
@@ -583,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
init_waitqueue_entry(&ewq->wq, current);
release_new_ctx = NULL;
- spin_lock(&ctx->event_wqh.lock);
+ spin_lock_irq(&ctx->event_wqh.lock);
/*
* After the __add_wait_queue the uwq is visible to userland
* through poll/read().
@@ -613,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
break;
}
- spin_unlock(&ctx->event_wqh.lock);
+ spin_unlock_irq(&ctx->event_wqh.lock);
wake_up_poll(&ctx->fd_wqh, EPOLLIN);
schedule();
- spin_lock(&ctx->event_wqh.lock);
+ spin_lock_irq(&ctx->event_wqh.lock);
}
__set_current_state(TASK_RUNNING);
- spin_unlock(&ctx->event_wqh.lock);
+ spin_unlock_irq(&ctx->event_wqh.lock);
if (release_new_ctx) {
struct vm_area_struct *vma;
@@ -918,10 +928,10 @@ wakeup:
* the last page faults that may have been already waiting on
* the fault_*wqh.
*/
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
/* Flush pending events that may still wait on event_wqh */
wake_up_all(&ctx->event_wqh);
@@ -1134,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
if (!ret && msg->event == UFFD_EVENT_FORK) {
ret = resolve_userfault_fork(ctx, fork_nctx, msg);
- spin_lock(&ctx->event_wqh.lock);
+ spin_lock_irq(&ctx->event_wqh.lock);
if (!list_empty(&fork_event)) {
/*
* The fork thread didn't abort, so we can
@@ -1180,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
if (ret)
userfaultfd_ctx_put(fork_nctx);
}
- spin_unlock(&ctx->event_wqh.lock);
+ spin_unlock_irq(&ctx->event_wqh.lock);
}
return ret;
@@ -1219,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
static void __wake_userfault(struct userfaultfd_ctx *ctx,
struct userfaultfd_wake_range *range)
{
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
/* wake all in the range and autoremove */
if (waitqueue_active(&ctx->fault_pending_wqh))
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
range);
if (waitqueue_active(&ctx->fault_wqh))
__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
}
static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
@@ -1881,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
wait_queue_entry_t *wq;
unsigned long pending = 0, total = 0;
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
pending++;
total++;
@@ -1889,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
total++;
}
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
/*
* If more protocols will be added, there will be all shown
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 8da5e6637771..11f703d4a605 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -782,7 +782,7 @@ xfs_add_to_ioend(
atomic_inc(&iop->write_count);
if (!merged) {
- if (bio_full(wpc->ioend->io_bio))
+ if (bio_full(wpc->ioend->io_bio, len))
xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
bio_add_page(wpc->ioend->io_bio, page, len, poff);
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 76748255f843..916a35cae5e9 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -367,20 +367,7 @@ restart:
* lock above. Eventually we should look into a way to avoid
* the pointless lock roundtrip.
*/
- if (likely(!(file->f_mode & FMODE_NOCMTIME))) {
- error = file_update_time(file);
- if (error)
- return error;
- }
-
- /*
- * If we're writing the file then make sure to clear the setuid and
- * setgid bits if the process is not being run by root. This keeps
- * people from modifying setuid and setgid binaries.
- */
- if (!IS_NOSEC(inode))
- return file_remove_privs(file);
- return 0;
+ return file_modified(file);
}
static int