aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/cache.c100
-rw-r--r--fs/9p/v9fs.c7
-rw-r--r--fs/9p/vfs_inode.c26
-rw-r--r--fs/9p/vfs_super.c2
-rw-r--r--fs/afs/cache.c150
-rw-r--r--fs/afs/cell.c6
-rw-r--r--fs/afs/file.c6
-rw-r--r--fs/afs/inode.c49
-rw-r--r--fs/afs/internal.h7
-rw-r--r--fs/afs/volume.c6
-rw-r--r--fs/block_dev.c6
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/cachefiles/interface.c61
-rw-r--r--fs/cachefiles/internal.h2
-rw-r--r--fs/cachefiles/main.c1
-rw-r--r--fs/cachefiles/namei.c75
-rw-r--r--fs/cachefiles/rdwr.c1
-rw-r--r--fs/cachefiles/xattr.c8
-rw-r--r--fs/ceph/cache.c113
-rw-r--r--fs/ceph/dir.c1
-rw-r--r--fs/cifs/cache.c168
-rw-r--r--fs/cifs/fscache.c130
-rw-r--r--fs/cifs/fscache.h13
-rw-r--r--fs/direct-io.c18
-rw-r--r--fs/exec.c6
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/f2fs/checkpoint.c101
-rw-r--r--fs/f2fs/data.c85
-rw-r--r--fs/f2fs/dir.c32
-rw-r--r--fs/f2fs/extent_cache.c5
-rw-r--r--fs/f2fs/f2fs.h188
-rw-r--r--fs/f2fs/file.c94
-rw-r--r--fs/f2fs/gc.c23
-rw-r--r--fs/f2fs/inline.c3
-rw-r--r--fs/f2fs/inode.c11
-rw-r--r--fs/f2fs/namei.c147
-rw-r--r--fs/f2fs/node.c55
-rw-r--r--fs/f2fs/node.h5
-rw-r--r--fs/f2fs/recovery.c25
-rw-r--r--fs/f2fs/segment.c133
-rw-r--r--fs/f2fs/segment.h27
-rw-r--r--fs/f2fs/super.c348
-rw-r--r--fs/f2fs/sysfs.c73
-rw-r--r--fs/fs-writeback.c9
-rw-r--r--fs/fscache/cache.c2
-rw-r--r--fs/fscache/cookie.c386
-rw-r--r--fs/fscache/fsdef.c55
-rw-r--r--fs/fscache/internal.h44
-rw-r--r--fs/fscache/main.c1
-rw-r--r--fs/fscache/netfs.c71
-rw-r--r--fs/fscache/object-list.c28
-rw-r--r--fs/fscache/object.c66
-rw-r--r--fs/fscache/operation.c26
-rw-r--r--fs/fscache/page.c84
-rw-r--r--fs/fscache/stats.c1
-rw-r--r--fs/fuse/inode.c3
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/gfs2/xattr.c8
-rw-r--r--fs/hugetlbfs/inode.c10
-rw-r--r--fs/internal.h1
-rw-r--r--fs/jffs2/erase.c37
-rw-r--r--fs/lockd/svc.c4
-rw-r--r--fs/locks.c2
-rw-r--r--fs/namei.c138
-rw-r--r--fs/nfs/fscache-index.c159
-rw-r--r--fs/nfs/fscache.c89
-rw-r--r--fs/nfs/fscache.h15
-rw-r--r--fs/nfs/nfs4xdr.c1
-rw-r--r--fs/nfsd/nfs3proc.c18
-rw-r--r--fs/nfsd/nfs3xdr.c67
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/nfs4layouts.c16
-rw-r--r--fs/nfsd/nfs4proc.c38
-rw-r--r--fs/nfsd/nfs4state.c286
-rw-r--r--fs/nfsd/nfs4xdr.c22
-rw-r--r--fs/nfsd/nfsfh.c12
-rw-r--r--fs/nfsd/nfsproc.c23
-rw-r--r--fs/nfsd/nfsxdr.c63
-rw-r--r--fs/nfsd/trace.h98
-rw-r--r--fs/nfsd/vfs.c65
-rw-r--r--fs/nfsd/vfs.h11
-rw-r--r--fs/nfsd/xdr.h3
-rw-r--r--fs/nfsd/xdr3.h3
-rw-r--r--fs/nfsd/xdr4.h5
-rw-r--r--fs/notify/fanotify/fanotify.c28
-rw-r--r--fs/notify/fanotify/fanotify.h3
-rw-r--r--fs/notify/fanotify/fanotify_user.c2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c8
-rw-r--r--fs/notify/inotify/inotify_user.c14
-rw-r--r--fs/notify/notification.c3
-rw-r--r--fs/ntfs/mft.c4
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c11
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmast.c2
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c29
-rw-r--r--fs/ocfs2/dlm/dlmdomain.h25
-rw-r--r--fs/ocfs2/dlm/dlmlock.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c25
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c41
-rw-r--r--fs/ocfs2/dlmglue.c23
-rw-r--r--fs/ocfs2/file.c16
-rw-r--r--fs/ocfs2/filecheck.c358
-rw-r--r--fs/ocfs2/filecheck.h29
-rw-r--r--fs/ocfs2/inode.c8
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/ocfs2.h8
-rw-r--r--fs/ocfs2/ocfs2_trace.h6
-rw-r--r--fs/ocfs2/refcounttree.c10
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/suballoc.c53
-rw-r--r--fs/ocfs2/super.c49
-rw-r--r--fs/ocfs2/uptodate.c3
-rw-r--r--fs/ocfs2/xattr.c2
-rw-r--r--fs/open.c44
-rw-r--r--fs/orangefs/acl.c1
-rw-r--r--fs/orangefs/devorangefs-req.c55
-rw-r--r--fs/orangefs/file.c125
-rw-r--r--fs/orangefs/inode.c4
-rw-r--r--fs/orangefs/orangefs-bufmap.c4
-rw-r--r--fs/orangefs/orangefs-debug.h6
-rw-r--r--fs/orangefs/orangefs-kernel.h80
-rw-r--r--fs/orangefs/orangefs-utils.c2
-rw-r--r--fs/orangefs/protocol.h45
-rw-r--r--fs/pstore/Kconfig101
-rw-r--r--fs/pstore/inode.c2
-rw-r--r--fs/pstore/internal.h3
-rw-r--r--fs/pstore/platform.c372
-rw-r--r--fs/pstore/ram.c2
-rw-r--r--fs/pstore/ram_core.c29
-rw-r--r--fs/reiserfs/reiserfs.h2
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/udf/file.c10
-rw-r--r--fs/udf/ialloc.c4
-rw-r--r--fs/udf/inode.c23
-rw-r--r--fs/udf/super.c260
-rw-r--r--fs/udf/udf_sb.h15
-rw-r--r--fs/udf/udfdecl.h2
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_export.c2
144 files changed, 3377 insertions, 2908 deletions
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 64c58eb26159..9eb34701a566 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -55,42 +55,27 @@ int v9fs_random_cachetag(struct v9fs_session_info *v9ses)
return scnprintf(v9ses->cachetag, CACHETAG_LEN, "%lu", jiffies);
}
-static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- struct v9fs_session_info *v9ses;
- uint16_t klen = 0;
-
- v9ses = (struct v9fs_session_info *)cookie_netfs_data;
- p9_debug(P9_DEBUG_FSC, "session %p buf %p size %u\n",
- v9ses, buffer, bufmax);
-
- if (v9ses->cachetag)
- klen = strlen(v9ses->cachetag);
-
- if (klen > bufmax)
- return 0;
-
- memcpy(buffer, v9ses->cachetag, klen);
- p9_debug(P9_DEBUG_FSC, "cache session tag %s\n", v9ses->cachetag);
- return klen;
-}
-
const struct fscache_cookie_def v9fs_cache_session_index_def = {
.name = "9P.session",
.type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = v9fs_cache_session_get_key,
};
void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
{
/* If no cache session tag was specified, we generate a random one. */
- if (!v9ses->cachetag)
- v9fs_random_cachetag(v9ses);
+ if (!v9ses->cachetag) {
+ if (v9fs_random_cachetag(v9ses) < 0) {
+ v9ses->fscache = NULL;
+ return;
+ }
+ }
v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index,
&v9fs_cache_session_index_def,
- v9ses, true);
+ v9ses->cachetag,
+ strlen(v9ses->cachetag),
+ NULL, 0,
+ v9ses, 0, true);
p9_debug(P9_DEBUG_FSC, "session %p get cookie %p\n",
v9ses, v9ses->fscache);
}
@@ -99,45 +84,15 @@ void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
{
p9_debug(P9_DEBUG_FSC, "session %p put cookie %p\n",
v9ses, v9ses->fscache);
- fscache_relinquish_cookie(v9ses->fscache, 0);
+ fscache_relinquish_cookie(v9ses->fscache, NULL, false);
v9ses->fscache = NULL;
}
-
-static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct v9fs_inode *v9inode = cookie_netfs_data;
- memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path));
- p9_debug(P9_DEBUG_FSC, "inode %p get key %llu\n",
- &v9inode->vfs_inode, v9inode->qid.path);
- return sizeof(v9inode->qid.path);
-}
-
-static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
- uint64_t *size)
-{
- const struct v9fs_inode *v9inode = cookie_netfs_data;
- *size = i_size_read(&v9inode->vfs_inode);
-
- p9_debug(P9_DEBUG_FSC, "inode %p get attr %llu\n",
- &v9inode->vfs_inode, *size);
-}
-
-static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
- void *buffer, uint16_t buflen)
-{
- const struct v9fs_inode *v9inode = cookie_netfs_data;
- memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version));
- p9_debug(P9_DEBUG_FSC, "inode %p get aux %u\n",
- &v9inode->vfs_inode, v9inode->qid.version);
- return sizeof(v9inode->qid.version);
-}
-
static enum
fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
const void *buffer,
- uint16_t buflen)
+ uint16_t buflen,
+ loff_t object_size)
{
const struct v9fs_inode *v9inode = cookie_netfs_data;
@@ -154,9 +109,6 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
const struct fscache_cookie_def v9fs_cache_inode_index_def = {
.name = "9p.inode",
.type = FSCACHE_COOKIE_TYPE_DATAFILE,
- .get_key = v9fs_cache_inode_get_key,
- .get_attr = v9fs_cache_inode_get_attr,
- .get_aux = v9fs_cache_inode_get_aux,
.check_aux = v9fs_cache_inode_check_aux,
};
@@ -175,7 +127,13 @@ void v9fs_cache_inode_get_cookie(struct inode *inode)
v9ses = v9fs_inode2v9ses(inode);
v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
- v9inode, true);
+ &v9inode->qid.path,
+ sizeof(v9inode->qid.path),
+ &v9inode->qid.version,
+ sizeof(v9inode->qid.version),
+ v9inode,
+ i_size_read(&v9inode->vfs_inode),
+ true);
p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n",
inode, v9inode->fscache);
@@ -190,7 +148,8 @@ void v9fs_cache_inode_put_cookie(struct inode *inode)
p9_debug(P9_DEBUG_FSC, "inode %p put cookie %p\n",
inode, v9inode->fscache);
- fscache_relinquish_cookie(v9inode->fscache, 0);
+ fscache_relinquish_cookie(v9inode->fscache, &v9inode->qid.version,
+ false);
v9inode->fscache = NULL;
}
@@ -203,7 +162,7 @@ void v9fs_cache_inode_flush_cookie(struct inode *inode)
p9_debug(P9_DEBUG_FSC, "inode %p flush cookie %p\n",
inode, v9inode->fscache);
- fscache_relinquish_cookie(v9inode->fscache, 1);
+ fscache_relinquish_cookie(v9inode->fscache, NULL, true);
v9inode->fscache = NULL;
}
@@ -236,12 +195,18 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode)
old = v9inode->fscache;
mutex_lock(&v9inode->fscache_lock);
- fscache_relinquish_cookie(v9inode->fscache, 1);
+ fscache_relinquish_cookie(v9inode->fscache, NULL, true);
v9ses = v9fs_inode2v9ses(inode);
v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
- v9inode, true);
+ &v9inode->qid.path,
+ sizeof(v9inode->qid.path),
+ &v9inode->qid.version,
+ sizeof(v9inode->qid.version),
+ v9inode,
+ i_size_read(&v9inode->vfs_inode),
+ true);
p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n",
inode, old, v9inode->fscache);
@@ -367,7 +332,8 @@ void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
const struct v9fs_inode *v9inode = V9FS_I(inode);
p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page);
- ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL);
+ ret = fscache_write_page(v9inode->fscache, page,
+ i_size_read(&v9inode->vfs_inode), GFP_KERNEL);
p9_debug(P9_DEBUG_FSC, "ret = %d\n", ret);
if (ret != 0)
v9fs_uncache_page(inode, page);
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 8fb89ddc6cc7..e622f0f10502 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -292,6 +292,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
#ifdef CONFIG_9P_FSCACHE
kfree(v9ses->cachetag);
v9ses->cachetag = match_strdup(&args[0]);
+ if (!v9ses->cachetag) {
+ ret = -ENOMEM;
+ goto free_and_return;
+ }
#endif
break;
case Opt_cache:
@@ -471,6 +475,9 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
return fid;
err_clnt:
+#ifdef CONFIG_9P_FSCACHE
+ kfree(v9ses->cachetag);
+#endif
p9_client_destroy(v9ses->clnt);
err_names:
kfree(v9ses->uname);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index bdabb2765d1b..9ee534159cc6 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -579,6 +579,24 @@ static int v9fs_at_to_dotl_flags(int flags)
}
/**
+ * v9fs_dec_count - helper functon to drop i_nlink.
+ *
+ * If a directory had nlink <= 2 (including . and ..), then we should not drop
+ * the link count, which indicates the underlying exported fs doesn't maintain
+ * nlink accurately. e.g.
+ * - overlayfs sets nlink to 1 for merged dir
+ * - ext4 (with dir_nlink feature enabled) sets nlink to 1 if a dir has more
+ * than EXT4_LINK_MAX (65000) links.
+ *
+ * @inode: inode whose nlink is being dropped
+ */
+static void v9fs_dec_count(struct inode *inode)
+{
+ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
+ drop_nlink(inode);
+}
+
+/**
* v9fs_remove - helper function to remove files and directories
* @dir: directory inode that is being deleted
* @dentry: dentry that is being deleted
@@ -621,9 +639,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
*/
if (flags & AT_REMOVEDIR) {
clear_nlink(inode);
- drop_nlink(dir);
+ v9fs_dec_count(dir);
} else
- drop_nlink(inode);
+ v9fs_dec_count(inode);
v9fs_invalidate_inode_attr(inode);
v9fs_invalidate_inode_attr(dir);
@@ -1024,12 +1042,12 @@ clunk_newdir:
if (S_ISDIR(new_inode->i_mode))
clear_nlink(new_inode);
else
- drop_nlink(new_inode);
+ v9fs_dec_count(new_inode);
}
if (S_ISDIR(old_inode->i_mode)) {
if (!new_inode)
inc_nlink(new_dir);
- drop_nlink(old_dir);
+ v9fs_dec_count(old_dir);
}
v9fs_invalidate_inode_attr(old_inode);
v9fs_invalidate_inode_attr(old_dir);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index af03c2a901eb..48ce50484e80 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -94,7 +94,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
if (v9ses->cache)
sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE;
- sb->s_flags |= SB_ACTIVE | SB_DIRSYNC | SB_NOATIME;
+ sb->s_flags |= SB_ACTIVE | SB_DIRSYNC;
if (!v9ses->cache)
sb->s_flags |= SB_SYNCHRONOUS;
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index f62ff71d28c9..b1c31ec4523a 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -12,167 +12,39 @@
#include <linux/sched.h>
#include "internal.h"
-static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t buflen);
-static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t buflen);
-
-static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t buflen);
-static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
- uint64_t *size);
-static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
- void *buffer, uint16_t buflen);
static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
const void *buffer,
- uint16_t buflen);
+ uint16_t buflen,
+ loff_t object_size);
struct fscache_netfs afs_cache_netfs = {
.name = "afs",
- .version = 1,
+ .version = 2,
};
struct fscache_cookie_def afs_cell_cache_index_def = {
.name = "AFS.cell",
.type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = afs_cell_cache_get_key,
};
struct fscache_cookie_def afs_volume_cache_index_def = {
.name = "AFS.volume",
.type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = afs_volume_cache_get_key,
};
struct fscache_cookie_def afs_vnode_cache_index_def = {
- .name = "AFS.vnode",
- .type = FSCACHE_COOKIE_TYPE_DATAFILE,
- .get_key = afs_vnode_cache_get_key,
- .get_attr = afs_vnode_cache_get_attr,
- .get_aux = afs_vnode_cache_get_aux,
- .check_aux = afs_vnode_cache_check_aux,
+ .name = "AFS.vnode",
+ .type = FSCACHE_COOKIE_TYPE_DATAFILE,
+ .check_aux = afs_vnode_cache_check_aux,
};
/*
- * set the key for the index entry
- */
-static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct afs_cell *cell = cookie_netfs_data;
- uint16_t klen;
-
- _enter("%p,%p,%u", cell, buffer, bufmax);
-
- klen = strlen(cell->name);
- if (klen > bufmax)
- return 0;
-
- memcpy(buffer, cell->name, klen);
- return klen;
-}
-
-/*****************************************************************************/
-/*
- * set the key for the volume index entry
- */
-static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct afs_volume *volume = cookie_netfs_data;
- struct {
- u64 volid;
- } __packed key;
-
- _enter("{%u},%p,%u", volume->type, buffer, bufmax);
-
- if (bufmax < sizeof(key))
- return 0;
-
- key.volid = volume->vid;
- memcpy(buffer, &key, sizeof(key));
- return sizeof(key);
-}
-
-/*****************************************************************************/
-/*
- * set the key for the index entry
- */
-static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct afs_vnode *vnode = cookie_netfs_data;
- struct {
- u32 vnode_id[3];
- } __packed key;
-
- _enter("{%x,%x,%llx},%p,%u",
- vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
- buffer, bufmax);
-
- /* Allow for a 96-bit key */
- memset(&key, 0, sizeof(key));
- key.vnode_id[0] = vnode->fid.vnode;
- key.vnode_id[1] = 0;
- key.vnode_id[2] = 0;
-
- if (sizeof(key) > bufmax)
- return 0;
-
- memcpy(buffer, &key, sizeof(key));
- return sizeof(key);
-}
-
-/*
- * provide updated file attributes
- */
-static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
- uint64_t *size)
-{
- const struct afs_vnode *vnode = cookie_netfs_data;
-
- _enter("{%x,%x,%llx},",
- vnode->fid.vnode, vnode->fid.unique,
- vnode->status.data_version);
-
- *size = vnode->status.size;
-}
-
-struct afs_vnode_cache_aux {
- u64 data_version;
- u32 fid_unique;
-} __packed;
-
-/*
- * provide new auxiliary cache data
- */
-static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct afs_vnode *vnode = cookie_netfs_data;
- struct afs_vnode_cache_aux aux;
-
- _enter("{%x,%x,%Lx},%p,%u",
- vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
- buffer, bufmax);
-
- memset(&aux, 0, sizeof(aux));
- aux.data_version = vnode->status.data_version;
- aux.fid_unique = vnode->fid.unique;
-
- if (bufmax < sizeof(aux))
- return 0;
-
- memcpy(buffer, &aux, sizeof(aux));
- return sizeof(aux);
-}
-
-/*
* check that the auxiliary data indicates that the entry is still valid
*/
static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
const void *buffer,
- uint16_t buflen)
+ uint16_t buflen,
+ loff_t object_size)
{
struct afs_vnode *vnode = cookie_netfs_data;
struct afs_vnode_cache_aux aux;
@@ -189,12 +61,6 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
return FSCACHE_CHECKAUX_OBSOLETE;
}
- if (vnode->fid.unique != aux.fid_unique) {
- _leave(" = OBSOLETE [uniq %x != %x]",
- aux.fid_unique, vnode->fid.unique);
- return FSCACHE_CHECKAUX_OBSOLETE;
- }
-
if (vnode->status.data_version != aux.data_version) {
_leave(" = OBSOLETE [vers %llx != %llx]",
aux.data_version, vnode->status.data_version);
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 3d2c5e0e854e..4235a05afc76 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -522,7 +522,9 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell)
#ifdef CONFIG_AFS_FSCACHE
cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
&afs_cell_cache_index_def,
- cell, true);
+ cell->name, strlen(cell->name),
+ NULL, 0,
+ cell, 0, true);
#endif
ret = afs_proc_cell_setup(net, cell);
if (ret < 0)
@@ -547,7 +549,7 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell)
spin_unlock(&net->proc_cells_lock);
#ifdef CONFIG_AFS_FSCACHE
- fscache_relinquish_cookie(cell->cache, 0);
+ fscache_relinquish_cookie(cell->cache, NULL, false);
cell->cache = NULL;
#endif
diff --git a/fs/afs/file.c b/fs/afs/file.c
index a39192ced99e..79e665a35fea 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -339,7 +339,8 @@ int afs_page_filler(void *data, struct page *page)
/* send the page to the cache */
#ifdef CONFIG_AFS_FSCACHE
if (PageFsCache(page) &&
- fscache_write_page(vnode->cache, page, GFP_KERNEL) != 0) {
+ fscache_write_page(vnode->cache, page, vnode->status.size,
+ GFP_KERNEL) != 0) {
fscache_uncache_page(vnode->cache, page);
BUG_ON(PageFsCache(page));
}
@@ -403,7 +404,8 @@ static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req)
/* send the page to the cache */
#ifdef CONFIG_AFS_FSCACHE
if (PageFsCache(page) &&
- fscache_write_page(vnode->cache, page, GFP_KERNEL) != 0) {
+ fscache_write_page(vnode->cache, page, vnode->status.size,
+ GFP_KERNEL) != 0) {
fscache_uncache_page(vnode->cache, page);
BUG_ON(PageFsCache(page));
}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 6b39d0255b72..65c5b1edd338 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -243,6 +243,33 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
}
/*
+ * Get a cache cookie for an inode.
+ */
+static void afs_get_inode_cache(struct afs_vnode *vnode)
+{
+#ifdef CONFIG_AFS_FSCACHE
+ struct {
+ u32 vnode_id;
+ u32 unique;
+ u32 vnode_id_ext[2]; /* Allow for a 96-bit key */
+ } __packed key;
+ struct afs_vnode_cache_aux aux;
+
+ key.vnode_id = vnode->fid.vnode;
+ key.unique = vnode->fid.unique;
+ key.vnode_id_ext[0] = 0;
+ key.vnode_id_ext[1] = 0;
+ aux.data_version = vnode->status.data_version;
+
+ vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
+ &afs_vnode_cache_index_def,
+ &key, sizeof(key),
+ &aux, sizeof(aux),
+ vnode, vnode->status.size, true);
+#endif
+}
+
+/*
* inode retrieval
*/
struct inode *afs_iget(struct super_block *sb, struct key *key,
@@ -307,11 +334,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
/* set up caching before mapping the status, as map-status reads the
* first page of symlinks to see if they're really mountpoints */
inode->i_size = vnode->status.size;
-#ifdef CONFIG_AFS_FSCACHE
- vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
- &afs_vnode_cache_index_def,
- vnode, true);
-#endif
+ afs_get_inode_cache(vnode);
ret = afs_inode_map_status(vnode, key);
if (ret < 0)
@@ -327,7 +350,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
/* failure */
bad_inode:
#ifdef CONFIG_AFS_FSCACHE
- fscache_relinquish_cookie(vnode->cache, 0);
+ fscache_relinquish_cookie(vnode->cache, NULL, ret == -ENOENT);
vnode->cache = NULL;
#endif
iget_failed(inode);
@@ -343,6 +366,10 @@ void afs_zap_data(struct afs_vnode *vnode)
{
_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_invalidate(vnode->cache);
+#endif
+
/* nuke all the non-dirty pages that aren't locked, mapped or being
* written back in a regular file and completely discard the pages in a
* directory or symlink */
@@ -507,8 +534,14 @@ void afs_evict_inode(struct inode *inode)
}
#ifdef CONFIG_AFS_FSCACHE
- fscache_relinquish_cookie(vnode->cache, 0);
- vnode->cache = NULL;
+ {
+ struct afs_vnode_cache_aux aux;
+
+ aux.data_version = vnode->status.data_version;
+ fscache_relinquish_cookie(vnode->cache, &aux,
+ test_bit(AFS_VNODE_DELETED, &vnode->flags));
+ vnode->cache = NULL;
+ }
#endif
afs_put_permits(vnode->permit_cache);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 72217170b155..a6a1d75eee41 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -559,6 +559,13 @@ struct afs_fs_cursor {
#define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
};
+/*
+ * Cache auxiliary data.
+ */
+struct afs_vnode_cache_aux {
+ u64 data_version;
+} __packed;
+
#include <trace/events/afs.h>
/*****************************************************************************/
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index b517a588781f..3037bd01f617 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -225,7 +225,9 @@ void afs_activate_volume(struct afs_volume *volume)
#ifdef CONFIG_AFS_FSCACHE
volume->cache = fscache_acquire_cookie(volume->cell->cache,
&afs_volume_cache_index_def,
- volume, true);
+ &volume->vid, sizeof(volume->vid),
+ NULL, 0,
+ volume, 0, true);
#endif
write_lock(&volume->cell->proc_lock);
@@ -245,7 +247,7 @@ void afs_deactivate_volume(struct afs_volume *volume)
write_unlock(&volume->cell->proc_lock);
#ifdef CONFIG_AFS_FSCACHE
- fscache_relinquish_cookie(volume->cache,
+ fscache_relinquish_cookie(volume->cache, NULL,
test_bit(AFS_VOLUME_DELETED, &volume->flags));
volume->cache = NULL;
#endif
diff --git a/fs/block_dev.c b/fs/block_dev.c
index fe09ef9c21f3..7a506c55a993 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1324,7 +1324,8 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
* @bdev: struct bdev to adjust.
*
* This routine checks to see if the bdev size does not match the disk size
- * and adjusts it if it differs.
+ * and adjusts it if it differs. When shrinking the bdev size, its all caches
+ * are freed.
*/
void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
{
@@ -1337,7 +1338,8 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
"%s: detected capacity change from %lld to %lld\n",
disk->disk_name, bdev_size, disk_size);
i_size_write(bdev->bd_inode, disk_size);
- flush_disk(bdev, false);
+ if (bdev_size > disk_size)
+ flush_disk(bdev, false);
}
}
EXPORT_SYMBOL(check_disk_size_change);
diff --git a/fs/buffer.c b/fs/buffer.c
index 9a73924db22f..ec5dd39071e6 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1511,7 +1511,7 @@ void block_invalidatepage(struct page *page, unsigned int offset,
* The get_block cached value has been unconditionally invalidated,
* so real IO is not possible anymore.
*/
- if (offset == 0)
+ if (length == PAGE_SIZE)
try_to_release_page(page, 0);
out:
return;
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index e7f16a77a22a..222bc5d8b62c 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -32,7 +32,7 @@ static struct fscache_object *cachefiles_alloc_object(
struct cachefiles_cache *cache;
struct cachefiles_xattr *auxdata;
unsigned keylen, auxlen;
- void *buffer;
+ void *buffer, *p;
char *key;
cache = container_of(_cache, struct cachefiles_cache, cache);
@@ -65,8 +65,12 @@ static struct fscache_object *cachefiles_alloc_object(
if (!buffer)
goto nomem_buffer;
- keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512);
- ASSERTCMP(keylen, <, 512);
+ keylen = cookie->key_len;
+ if (keylen <= sizeof(cookie->inline_key))
+ p = cookie->inline_key;
+ else
+ p = cookie->key;
+ memcpy(buffer + 2, p, keylen);
*(uint16_t *)buffer = keylen;
((char *)buffer)[keylen + 2] = 0;
@@ -80,15 +84,17 @@ static struct fscache_object *cachefiles_alloc_object(
/* get hold of the auxiliary data and prepend the object type */
auxdata = buffer;
- auxlen = 0;
- if (cookie->def->get_aux) {
- auxlen = cookie->def->get_aux(cookie->netfs_data,
- auxdata->data, 511);
- ASSERTCMP(auxlen, <, 511);
+ auxlen = cookie->aux_len;
+ if (auxlen) {
+ if (auxlen <= sizeof(cookie->inline_aux))
+ p = cookie->inline_aux;
+ else
+ p = cookie->aux;
+ memcpy(auxdata->data, p, auxlen);
}
auxdata->len = auxlen + 1;
- auxdata->type = cookie->def->type;
+ auxdata->type = cookie->type;
lookup_data->auxdata = auxdata;
lookup_data->key = key;
@@ -177,10 +183,12 @@ static void cachefiles_lookup_complete(struct fscache_object *_object)
* increment the usage count on an inode object (may fail if unmounting)
*/
static
-struct fscache_object *cachefiles_grab_object(struct fscache_object *_object)
+struct fscache_object *cachefiles_grab_object(struct fscache_object *_object,
+ enum fscache_obj_ref_trace why)
{
struct cachefiles_object *object =
container_of(_object, struct cachefiles_object, fscache);
+ int u;
_enter("{OBJ%x,%d}", _object->debug_id, atomic_read(&object->usage));
@@ -188,7 +196,9 @@ struct fscache_object *cachefiles_grab_object(struct fscache_object *_object)
ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000);
#endif
- atomic_inc(&object->usage);
+ u = atomic_inc_return(&object->usage);
+ trace_cachefiles_ref(object, _object->cookie,
+ (enum cachefiles_obj_ref_trace)why, u);
return &object->fscache;
}
@@ -202,6 +212,7 @@ static void cachefiles_update_object(struct fscache_object *_object)
struct cachefiles_cache *cache;
struct fscache_cookie *cookie;
const struct cred *saved_cred;
+ const void *aux;
unsigned auxlen;
_enter("{OBJ%x}", _object->debug_id);
@@ -216,26 +227,29 @@ static void cachefiles_update_object(struct fscache_object *_object)
}
cookie = object->fscache.cookie;
+ auxlen = cookie->aux_len;
- if (!cookie->def->get_aux) {
+ if (!auxlen) {
fscache_unuse_cookie(_object);
_leave(" [no aux]");
return;
}
- auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp);
+ auxdata = kmalloc(2 + auxlen + 3, cachefiles_gfp);
if (!auxdata) {
fscache_unuse_cookie(_object);
_leave(" [nomem]");
return;
}
- auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511);
+ aux = (auxlen <= sizeof(cookie->inline_aux)) ?
+ cookie->inline_aux : cookie->aux;
+
+ memcpy(auxdata->data, aux, auxlen);
fscache_unuse_cookie(_object);
- ASSERTCMP(auxlen, <, 511);
auxdata->len = auxlen + 1;
- auxdata->type = cookie->def->type;
+ auxdata->type = cookie->type;
cachefiles_begin_secure(cache, &saved_cred);
cachefiles_update_object_xattr(object, auxdata);
@@ -309,10 +323,12 @@ static void cachefiles_drop_object(struct fscache_object *_object)
/*
* dispose of a reference to an object
*/
-static void cachefiles_put_object(struct fscache_object *_object)
+static void cachefiles_put_object(struct fscache_object *_object,
+ enum fscache_obj_ref_trace why)
{
struct cachefiles_object *object;
struct fscache_cache *cache;
+ int u;
ASSERT(_object);
@@ -328,7 +344,11 @@ static void cachefiles_put_object(struct fscache_object *_object)
ASSERTIFCMP(object->fscache.parent,
object->fscache.parent->n_children, >, 0);
- if (atomic_dec_and_test(&object->usage)) {
+ u = atomic_dec_return(&object->usage);
+ trace_cachefiles_ref(object, _object->cookie,
+ (enum cachefiles_obj_ref_trace)why, u);
+ ASSERTCMP(u, !=, -1);
+ if (u == 0) {
_debug("- kill object OBJ%x", object->fscache.debug_id);
ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags));
@@ -421,7 +441,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
loff_t oi_size;
int ret;
- _object->cookie->def->get_attr(_object->cookie->netfs_data, &ni_size);
+ ni_size = _object->store_limit_l;
_enter("{OBJ%x},[%llu]",
_object->debug_id, (unsigned long long) ni_size);
@@ -493,8 +513,7 @@ static void cachefiles_invalidate_object(struct fscache_operation *op)
cache = container_of(object->fscache.cache,
struct cachefiles_cache, cache);
- op->object->cookie->def->get_attr(op->object->cookie->netfs_data,
- &ni_size);
+ ni_size = op->object->store_limit_l;
_enter("{OBJ%x},[%llu]",
op->object->debug_id, (unsigned long long)ni_size);
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index bb3a02ca9da4..d2f6f996e65a 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -124,6 +124,8 @@ struct cachefiles_xattr {
uint8_t data[];
};
+#include <trace/events/cachefiles.h>
+
/*
* note change of state for daemon
*/
diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c
index 711f13d8c2de..f54d3f5b2e40 100644
--- a/fs/cachefiles/main.c
+++ b/fs/cachefiles/main.c
@@ -22,6 +22,7 @@
#include <linux/statfs.h>
#include <linux/sysctl.h>
#include <linux/miscdevice.h>
+#define CREATE_TRACE_POINTS
#include "internal.h"
unsigned cachefiles_debug;
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 3978b324cbca..0daa1e3fe0df 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -30,11 +30,11 @@
*/
static noinline
void __cachefiles_printk_object(struct cachefiles_object *object,
- const char *prefix,
- u8 *keybuf)
+ const char *prefix)
{
struct fscache_cookie *cookie;
- unsigned keylen, loop;
+ const u8 *k;
+ unsigned loop;
pr_err("%sobject: OBJ%x\n", prefix, object->fscache.debug_id);
pr_err("%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n",
@@ -56,23 +56,16 @@ void __cachefiles_printk_object(struct cachefiles_object *object,
object->fscache.cookie->parent,
object->fscache.cookie->netfs_data,
object->fscache.cookie->flags);
- if (keybuf && cookie->def)
- keylen = cookie->def->get_key(cookie->netfs_data, keybuf,
- CACHEFILES_KEYBUF_SIZE);
- else
- keylen = 0;
+ pr_err("%skey=[%u] '", prefix, cookie->key_len);
+ k = (cookie->key_len <= sizeof(cookie->inline_key)) ?
+ cookie->inline_key : cookie->key;
+ for (loop = 0; loop < cookie->key_len; loop++)
+ pr_cont("%02x", k[loop]);
+ pr_cont("'\n");
} else {
pr_err("%scookie=NULL\n", prefix);
- keylen = 0;
}
spin_unlock(&object->fscache.lock);
-
- if (keylen) {
- pr_err("%skey=[%u] '", prefix, keylen);
- for (loop = 0; loop < keylen; loop++)
- pr_cont("%02x", keybuf[loop]);
- pr_cont("'\n");
- }
}
/*
@@ -81,14 +74,10 @@ void __cachefiles_printk_object(struct cachefiles_object *object,
static noinline void cachefiles_printk_object(struct cachefiles_object *object,
struct cachefiles_object *xobject)
{
- u8 *keybuf;
-
- keybuf = kmalloc(CACHEFILES_KEYBUF_SIZE, GFP_NOIO);
if (object)
- __cachefiles_printk_object(object, "", keybuf);
+ __cachefiles_printk_object(object, "");
if (xobject)
- __cachefiles_printk_object(xobject, "x", keybuf);
- kfree(keybuf);
+ __cachefiles_printk_object(xobject, "x");
}
/*
@@ -120,6 +109,7 @@ static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
}
write_unlock(&cache->active_lock);
+ trace_cachefiles_mark_buried(NULL, dentry, why);
_leave(" [no owner]");
return;
@@ -130,6 +120,8 @@ found_dentry:
object->fscache.state->name,
dentry);
+ trace_cachefiles_mark_buried(object, dentry, why);
+
if (fscache_object_is_live(&object->fscache)) {
pr_err("\n");
pr_err("Error: Can't preemptively bury live object\n");
@@ -158,13 +150,15 @@ static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
try_again:
write_lock(&cache->active_lock);
+ dentry = object->dentry;
+ trace_cachefiles_mark_active(object, dentry);
+
if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) {
pr_err("Error: Object already active\n");
cachefiles_printk_object(object, NULL);
BUG();
}
- dentry = object->dentry;
_p = &cache->active_nodes.rb_node;
while (*_p) {
_parent = *_p;
@@ -191,6 +185,8 @@ try_again:
/* an old object from a previous incarnation is hogging the slot - we
* need to wait for it to be destroyed */
wait_for_old_object:
+ trace_cachefiles_wait_active(object, dentry, xobject);
+
if (fscache_object_is_live(&xobject->fscache)) {
pr_err("\n");
pr_err("Error: Unexpected object collision\n");
@@ -248,12 +244,12 @@ wait_for_old_object:
ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags));
- cache->cache.ops->put_object(&xobject->fscache);
+ cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_retry);
goto try_again;
requeue:
clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
- cache->cache.ops->put_object(&xobject->fscache);
+ cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_timeo);
_leave(" = -ETIMEDOUT");
return -ETIMEDOUT;
}
@@ -265,6 +261,11 @@ void cachefiles_mark_object_inactive(struct cachefiles_cache *cache,
struct cachefiles_object *object,
blkcnt_t i_blocks)
{
+ struct dentry *dentry = object->dentry;
+ struct inode *inode = d_backing_inode(dentry);
+
+ trace_cachefiles_mark_inactive(object, dentry, inode);
+
write_lock(&cache->active_lock);
rb_erase(&object->active_node, &cache->active_nodes);
clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
@@ -288,6 +289,7 @@ void cachefiles_mark_object_inactive(struct cachefiles_cache *cache,
* - unlocks the directory mutex
*/
static int cachefiles_bury_object(struct cachefiles_cache *cache,
+ struct cachefiles_object *object,
struct dentry *dir,
struct dentry *rep,
bool preemptive,
@@ -312,6 +314,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
if (ret < 0) {
cachefiles_io_error(cache, "Unlink security error");
} else {
+ trace_cachefiles_unlink(object, rep, why);
ret = vfs_unlink(d_inode(dir), rep, NULL);
if (preemptive)
@@ -413,6 +416,7 @@ try_again:
if (ret < 0) {
cachefiles_io_error(cache, "Rename security error %d", ret);
} else {
+ trace_cachefiles_rename(object, rep, grave, why);
ret = vfs_rename(d_inode(dir), rep,
d_inode(cache->graveyard), grave, NULL, 0);
if (ret != 0 && ret != -ENOMEM)
@@ -458,7 +462,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
/* we need to check that our parent is _still_ our parent - it
* may have been renamed */
if (dir == object->dentry->d_parent) {
- ret = cachefiles_bury_object(cache, dir,
+ ret = cachefiles_bury_object(cache, object, dir,
object->dentry, false,
FSCACHE_OBJECT_WAS_RETIRED);
} else {
@@ -486,6 +490,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
{
struct cachefiles_cache *cache;
struct dentry *dir, *next = NULL;
+ struct inode *inode;
struct path path;
unsigned long start;
const char *name;
@@ -529,13 +534,17 @@ lookup_again:
start = jiffies;
next = lookup_one_len(name, dir, nlen);
cachefiles_hist(cachefiles_lookup_histogram, start);
- if (IS_ERR(next))
+ if (IS_ERR(next)) {
+ trace_cachefiles_lookup(object, next, NULL);
goto lookup_error;
+ }
- _debug("next -> %p %s", next, d_backing_inode(next) ? "positive" : "negative");
+ inode = d_backing_inode(next);
+ trace_cachefiles_lookup(object, next, inode);
+ _debug("next -> %p %s", next, inode ? "positive" : "negative");
if (!key)
- object->new = !d_backing_inode(next);
+ object->new = !inode;
/* if this element of the path doesn't exist, then the lookup phase
* failed, and we can release any readers in the certain knowledge that
@@ -558,6 +567,8 @@ lookup_again:
start = jiffies;
ret = vfs_mkdir(d_inode(dir), next, 0);
cachefiles_hist(cachefiles_mkdir_histogram, start);
+ if (!key)
+ trace_cachefiles_mkdir(object, next, ret);
if (ret < 0)
goto create_error;
@@ -587,6 +598,7 @@ lookup_again:
start = jiffies;
ret = vfs_create(d_inode(dir), next, S_IFREG, true);
cachefiles_hist(cachefiles_create_histogram, start);
+ trace_cachefiles_create(object, next, ret);
if (ret < 0)
goto create_error;
@@ -629,7 +641,8 @@ lookup_again:
* mutex) */
object->dentry = NULL;
- ret = cachefiles_bury_object(cache, dir, next, true,
+ ret = cachefiles_bury_object(cache, object, dir, next,
+ true,
FSCACHE_OBJECT_IS_STALE);
dput(next);
next = NULL;
@@ -955,7 +968,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
/* actually remove the victim (drops the dir mutex) */
_debug("bury");
- ret = cachefiles_bury_object(cache, dir, victim, false,
+ ret = cachefiles_bury_object(cache, NULL, dir, victim, false,
FSCACHE_OBJECT_WAS_CULLED);
if (ret < 0)
goto error;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 883bc7bb12c5..5082c8a49686 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -952,6 +952,7 @@ error:
* - cache withdrawal is prevented by the caller
*/
void cachefiles_uncache_page(struct fscache_object *_object, struct page *page)
+ __releases(&object->fscache.cookie->lock)
{
struct cachefiles_object *object;
struct cachefiles_cache *cache;
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index d31c1a72d8a5..0a29a00aed2e 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -113,6 +113,7 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object,
/* attempt to install the cache metadata directly */
_debug("SET #%u", auxdata->len);
+ clear_bit(FSCACHE_COOKIE_AUX_UPDATED, &object->fscache.cookie->flags);
ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
&auxdata->type, auxdata->len,
XATTR_CREATE);
@@ -141,6 +142,7 @@ int cachefiles_update_object_xattr(struct cachefiles_object *object,
/* attempt to install the cache metadata directly */
_debug("SET #%u", auxdata->len);
+ clear_bit(FSCACHE_COOKIE_AUX_UPDATED, &object->fscache.cookie->flags);
ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
&auxdata->type, auxdata->len,
XATTR_REPLACE);
@@ -180,7 +182,8 @@ int cachefiles_check_auxdata(struct cachefiles_object *object)
goto error;
xlen--;
- validity = fscache_check_aux(&object->fscache, &auxbuf->data, xlen);
+ validity = fscache_check_aux(&object->fscache, &auxbuf->data, xlen,
+ i_size_read(d_backing_inode(dentry)));
if (validity != FSCACHE_CHECKAUX_OKAY)
goto error;
@@ -249,7 +252,8 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
object->fscache.cookie->def->name, dlen);
result = fscache_check_aux(&object->fscache,
- &auxbuf->data, dlen);
+ &auxbuf->data, dlen,
+ i_size_read(d_backing_inode(dentry)));
switch (result) {
/* entry okay as is */
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index a3ab265d3215..33a211b364ed 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -27,7 +27,6 @@
struct ceph_aux_inode {
u64 version;
struct timespec mtime;
- loff_t size;
};
struct fscache_netfs ceph_cache_netfs = {
@@ -41,34 +40,15 @@ static LIST_HEAD(ceph_fscache_list);
struct ceph_fscache_entry {
struct list_head list;
struct fscache_cookie *fscache;
- struct ceph_fsid fsid;
size_t uniq_len;
+ /* The following members must be last */
+ struct ceph_fsid fsid;
char uniquifier[0];
};
-static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t maxbuf)
-{
- const struct ceph_fs_client* fsc = cookie_netfs_data;
- const char *fscache_uniq = fsc->mount_options->fscache_uniq;
- uint16_t fsid_len, uniq_len;
-
- fsid_len = sizeof(fsc->client->fsid);
- uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0;
- if (fsid_len + uniq_len > maxbuf)
- return 0;
-
- memcpy(buffer, &fsc->client->fsid, fsid_len);
- if (uniq_len)
- memcpy(buffer + fsid_len, fscache_uniq, uniq_len);
-
- return fsid_len + uniq_len;
-}
-
static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
.name = "CEPH.fsid",
.type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = ceph_fscache_session_get_key,
};
int ceph_fscache_register(void)
@@ -110,16 +90,19 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
goto out_unlock;
}
+ memcpy(&ent->fsid, fsid, sizeof(*fsid));
+ if (uniq_len > 0) {
+ memcpy(&ent->uniquifier, fscache_uniq, uniq_len);
+ ent->uniq_len = uniq_len;
+ }
+
fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
&ceph_fscache_fsid_object_def,
- fsc, true);
+ &ent->fsid, sizeof(ent->fsid) + uniq_len,
+ NULL, 0,
+ fsc, 0, true);
if (fsc->fscache) {
- memcpy(&ent->fsid, fsid, sizeof(*fsid));
- if (uniq_len > 0) {
- memcpy(&ent->uniquifier, fscache_uniq, uniq_len);
- ent->uniq_len = uniq_len;
- }
ent->fscache = fsc->fscache;
list_add_tail(&ent->list, &ceph_fscache_list);
} else {
@@ -133,59 +116,21 @@ out_unlock:
return err;
}
-static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t maxbuf)
-{
- const struct ceph_inode_info* ci = cookie_netfs_data;
- uint16_t klen;
-
- /* use ceph virtual inode (id + snapshot) */
- klen = sizeof(ci->i_vino);
- if (klen > maxbuf)
- return 0;
-
- memcpy(buffer, &ci->i_vino, klen);
- return klen;
-}
-
-static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- struct ceph_aux_inode aux;
- const struct ceph_inode_info* ci = cookie_netfs_data;
- const struct inode* inode = &ci->vfs_inode;
-
- memset(&aux, 0, sizeof(aux));
- aux.version = ci->i_version;
- aux.mtime = inode->i_mtime;
- aux.size = i_size_read(inode);
-
- memcpy(buffer, &aux, sizeof(aux));
-
- return sizeof(aux);
-}
-
-static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
- uint64_t *size)
-{
- const struct ceph_inode_info* ci = cookie_netfs_data;
- *size = i_size_read(&ci->vfs_inode);
-}
-
static enum fscache_checkaux ceph_fscache_inode_check_aux(
- void *cookie_netfs_data, const void *data, uint16_t dlen)
+ void *cookie_netfs_data, const void *data, uint16_t dlen,
+ loff_t object_size)
{
struct ceph_aux_inode aux;
struct ceph_inode_info* ci = cookie_netfs_data;
struct inode* inode = &ci->vfs_inode;
- if (dlen != sizeof(aux))
+ if (dlen != sizeof(aux) ||
+ i_size_read(inode) != object_size)
return FSCACHE_CHECKAUX_OBSOLETE;
memset(&aux, 0, sizeof(aux));
aux.version = ci->i_version;
aux.mtime = inode->i_mtime;
- aux.size = i_size_read(inode);
if (memcmp(data, &aux, sizeof(aux)) != 0)
return FSCACHE_CHECKAUX_OBSOLETE;
@@ -197,9 +142,6 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
static const struct fscache_cookie_def ceph_fscache_inode_object_def = {
.name = "CEPH.inode",
.type = FSCACHE_COOKIE_TYPE_DATAFILE,
- .get_key = ceph_fscache_inode_get_key,
- .get_attr = ceph_fscache_inode_get_attr,
- .get_aux = ceph_fscache_inode_get_aux,
.check_aux = ceph_fscache_inode_check_aux,
};
@@ -207,6 +149,7 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_aux_inode aux;
/* No caching for filesystem */
if (!fsc->fscache)
@@ -218,9 +161,14 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
inode_lock_nested(inode, I_MUTEX_CHILD);
if (!ci->fscache) {
+ memset(&aux, 0, sizeof(aux));
+ aux.version = ci->i_version;
+ aux.mtime = inode->i_mtime;
ci->fscache = fscache_acquire_cookie(fsc->fscache,
- &ceph_fscache_inode_object_def,
- ci, false);
+ &ceph_fscache_inode_object_def,
+ &ci->i_vino, sizeof(ci->i_vino),
+ &aux, sizeof(aux),
+ ci, i_size_read(inode), false);
}
inode_unlock(inode);
}
@@ -235,7 +183,7 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
ci->fscache = NULL;
fscache_uncache_all_inode_pages(cookie, &ci->vfs_inode);
- fscache_relinquish_cookie(cookie, 0);
+ fscache_relinquish_cookie(cookie, &ci->i_vino, false);
}
static bool ceph_fscache_can_enable(void *data)
@@ -254,11 +202,11 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
if (inode_is_open_for_write(inode)) {
dout("fscache_file_set_cookie %p %p disabling cache\n",
inode, filp);
- fscache_disable_cookie(ci->fscache, false);
+ fscache_disable_cookie(ci->fscache, &ci->i_vino, false);
fscache_uncache_all_inode_pages(ci->fscache, inode);
} else {
- fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable,
- inode);
+ fscache_enable_cookie(ci->fscache, &ci->i_vino, i_size_read(inode),
+ ceph_fscache_can_enable, inode);
if (fscache_cookie_enabled(ci->fscache)) {
dout("fscache_file_set_cookie %p %p enabling cache\n",
inode, filp);
@@ -351,7 +299,8 @@ void ceph_readpage_to_fscache(struct inode *inode, struct page *page)
if (!cache_valid(ci))
return;
- ret = fscache_write_page(ci->fscache, page, GFP_KERNEL);
+ ret = fscache_write_page(ci->fscache, page, i_size_read(inode),
+ GFP_KERNEL);
if (ret)
fscache_uncache_page(ci->fscache, page);
}
@@ -385,7 +334,7 @@ void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
WARN_ON_ONCE(!found);
mutex_unlock(&ceph_fscache_lock);
- __fscache_relinquish_cookie(fsc->fscache, 0);
+ __fscache_relinquish_cookie(fsc->fscache, NULL, false);
}
fsc->fscache = NULL;
}
@@ -402,7 +351,7 @@ void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci)
* truncate while the caller holds CEPH_CAP_FILE_RD */
mutex_lock(&ci->i_truncate_mutex);
if (!cache_valid(ci)) {
- if (fscache_check_consistency(ci->fscache))
+ if (fscache_check_consistency(ci->fscache, &ci->i_vino))
fscache_invalidate(ci->fscache);
spin_lock(&ci->i_ceph_lock);
ci->i_fscache_gen = ci->i_rdcache_gen;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f1d9c6cc0491..2bdd561c4c68 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -2,7 +2,6 @@
#include <linux/ceph/ceph_debug.h>
#include <linux/spinlock.h>
-#include <linux/fs_struct.h>
#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/sched.h>
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index 2c14020e5e1d..edf5f40898bf 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -46,67 +46,11 @@ void cifs_fscache_unregister(void)
}
/*
- * Key layout of CIFS server cache index object
- */
-struct cifs_server_key {
- uint16_t family; /* address family */
- __be16 port; /* IP port */
- union {
- struct in_addr ipv4_addr;
- struct in6_addr ipv6_addr;
- } addr[0];
-};
-
-/*
- * Server object keyed by {IPaddress,port,family} tuple
- */
-static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t maxbuf)
-{
- const struct TCP_Server_Info *server = cookie_netfs_data;
- const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr;
- const struct sockaddr_in *addr = (struct sockaddr_in *) sa;
- const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa;
- struct cifs_server_key *key = buffer;
- uint16_t key_len = sizeof(struct cifs_server_key);
-
- memset(key, 0, key_len);
-
- /*
- * Should not be a problem as sin_family/sin6_family overlays
- * sa_family field
- */
- switch (sa->sa_family) {
- case AF_INET:
- key->family = sa->sa_family;
- key->port = addr->sin_port;
- key->addr[0].ipv4_addr = addr->sin_addr;
- key_len += sizeof(key->addr[0].ipv4_addr);
- break;
-
- case AF_INET6:
- key->family = sa->sa_family;
- key->port = addr6->sin6_port;
- key->addr[0].ipv6_addr = addr6->sin6_addr;
- key_len += sizeof(key->addr[0].ipv6_addr);
- break;
-
- default:
- cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
- key_len = 0;
- break;
- }
-
- return key_len;
-}
-
-/*
* Server object for FS-Cache
*/
const struct fscache_cookie_def cifs_fscache_server_index_def = {
.name = "CIFS.server",
.type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = cifs_server_get_key,
};
/*
@@ -116,7 +60,7 @@ struct cifs_fscache_super_auxdata {
u64 resource_id; /* unique server resource id */
};
-static char *extract_sharename(const char *treename)
+char *extract_sharename(const char *treename)
{
const char *src;
char *delim, *dst;
@@ -140,56 +84,11 @@ static char *extract_sharename(const char *treename)
return dst;
}
-/*
- * Superblock object currently keyed by share name
- */
-static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer,
- uint16_t maxbuf)
-{
- const struct cifs_tcon *tcon = cookie_netfs_data;
- char *sharename;
- uint16_t len;
-
- sharename = extract_sharename(tcon->treeName);
- if (IS_ERR(sharename)) {
- cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
- sharename = NULL;
- return 0;
- }
-
- len = strlen(sharename);
- if (len > maxbuf)
- return 0;
-
- memcpy(buffer, sharename, len);
-
- kfree(sharename);
-
- return len;
-}
-
-static uint16_t
-cifs_fscache_super_get_aux(const void *cookie_netfs_data, void *buffer,
- uint16_t maxbuf)
-{
- struct cifs_fscache_super_auxdata auxdata;
- const struct cifs_tcon *tcon = cookie_netfs_data;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.resource_id = tcon->resource_id;
-
- if (maxbuf > sizeof(auxdata))
- maxbuf = sizeof(auxdata);
-
- memcpy(buffer, &auxdata, maxbuf);
-
- return maxbuf;
-}
-
static enum
fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
const void *data,
- uint16_t datalen)
+ uint16_t datalen,
+ loff_t object_size)
{
struct cifs_fscache_super_auxdata auxdata;
const struct cifs_tcon *tcon = cookie_netfs_data;
@@ -212,68 +111,14 @@ fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
const struct fscache_cookie_def cifs_fscache_super_index_def = {
.name = "CIFS.super",
.type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = cifs_super_get_key,
- .get_aux = cifs_fscache_super_get_aux,
.check_aux = cifs_fscache_super_check_aux,
};
-/*
- * Auxiliary data attached to CIFS inode within the cache
- */
-struct cifs_fscache_inode_auxdata {
- struct timespec last_write_time;
- struct timespec last_change_time;
- u64 eof;
-};
-
-static uint16_t cifs_fscache_inode_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t maxbuf)
-{
- const struct cifsInodeInfo *cifsi = cookie_netfs_data;
- uint16_t keylen;
-
- /* use the UniqueId as the key */
- keylen = sizeof(cifsi->uniqueid);
- if (keylen > maxbuf)
- keylen = 0;
- else
- memcpy(buffer, &cifsi->uniqueid, keylen);
-
- return keylen;
-}
-
-static void
-cifs_fscache_inode_get_attr(const void *cookie_netfs_data, uint64_t *size)
-{
- const struct cifsInodeInfo *cifsi = cookie_netfs_data;
-
- *size = cifsi->vfs_inode.i_size;
-}
-
-static uint16_t
-cifs_fscache_inode_get_aux(const void *cookie_netfs_data, void *buffer,
- uint16_t maxbuf)
-{
- struct cifs_fscache_inode_auxdata auxdata;
- const struct cifsInodeInfo *cifsi = cookie_netfs_data;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
- auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
-
- if (maxbuf > sizeof(auxdata))
- maxbuf = sizeof(auxdata);
-
- memcpy(buffer, &auxdata, maxbuf);
-
- return maxbuf;
-}
-
static enum
fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
const void *data,
- uint16_t datalen)
+ uint16_t datalen,
+ loff_t object_size)
{
struct cifs_fscache_inode_auxdata auxdata;
struct cifsInodeInfo *cifsi = cookie_netfs_data;
@@ -295,8 +140,5 @@ fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
const struct fscache_cookie_def cifs_fscache_inode_object_def = {
.name = "CIFS.uniqueid",
.type = FSCACHE_COOKIE_TYPE_DATAFILE,
- .get_key = cifs_fscache_inode_get_key,
- .get_attr = cifs_fscache_inode_get_attr,
- .get_aux = cifs_fscache_inode_get_aux,
.check_aux = cifs_fscache_inode_check_aux,
};
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 8d4b7bc8ae91..25d3f66b2d50 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -23,11 +23,63 @@
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
+/*
+ * Key layout of CIFS server cache index object
+ */
+struct cifs_server_key {
+ struct {
+ uint16_t family; /* address family */
+ __be16 port; /* IP port */
+ } hdr;
+ union {
+ struct in_addr ipv4_addr;
+ struct in6_addr ipv6_addr;
+ };
+} __packed;
+
+/*
+ * Get a cookie for a server object keyed by {IPaddress,port,family} tuple
+ */
void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
{
+ const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr;
+ const struct sockaddr_in *addr = (struct sockaddr_in *) sa;
+ const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa;
+ struct cifs_server_key key;
+ uint16_t key_len = sizeof(key.hdr);
+
+ memset(&key, 0, sizeof(key));
+
+ /*
+ * Should not be a problem as sin_family/sin6_family overlays
+ * sa_family field
+ */
+ key.hdr.family = sa->sa_family;
+ switch (sa->sa_family) {
+ case AF_INET:
+ key.hdr.port = addr->sin_port;
+ key.ipv4_addr = addr->sin_addr;
+ key_len += sizeof(key.ipv4_addr);
+ break;
+
+ case AF_INET6:
+ key.hdr.port = addr6->sin6_port;
+ key.ipv6_addr = addr6->sin6_addr;
+ key_len += sizeof(key.ipv6_addr);
+ break;
+
+ default:
+ cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
+ server->fscache = NULL;
+ return;
+ }
+
server->fscache =
fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
- &cifs_fscache_server_index_def, server, true);
+ &cifs_fscache_server_index_def,
+ &key, key_len,
+ NULL, 0,
+ server, 0, true);
cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
__func__, server, server->fscache);
}
@@ -36,17 +88,29 @@ void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
{
cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
__func__, server, server->fscache);
- fscache_relinquish_cookie(server->fscache, 0);
+ fscache_relinquish_cookie(server->fscache, NULL, false);
server->fscache = NULL;
}
void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
{
struct TCP_Server_Info *server = tcon->ses->server;
+ char *sharename;
+
+ sharename = extract_sharename(tcon->treeName);
+ if (IS_ERR(sharename)) {
+ cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
+ tcon->fscache = NULL;
+ return;
+ }
tcon->fscache =
fscache_acquire_cookie(server->fscache,
- &cifs_fscache_super_index_def, tcon, true);
+ &cifs_fscache_super_index_def,
+ sharename, strlen(sharename),
+ &tcon->resource_id, sizeof(tcon->resource_id),
+ tcon, 0, true);
+ kfree(sharename);
cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
__func__, server->fscache, tcon->fscache);
}
@@ -54,10 +118,28 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
{
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, tcon->fscache);
- fscache_relinquish_cookie(tcon->fscache, 0);
+ fscache_relinquish_cookie(tcon->fscache, &tcon->resource_id, false);
tcon->fscache = NULL;
}
+static void cifs_fscache_acquire_inode_cookie(struct cifsInodeInfo *cifsi,
+ struct cifs_tcon *tcon)
+{
+ struct cifs_fscache_inode_auxdata auxdata;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.eof = cifsi->server_eof;
+ auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
+ auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
+
+ cifsi->fscache =
+ fscache_acquire_cookie(tcon->fscache,
+ &cifs_fscache_inode_object_def,
+ &cifsi->uniqueid, sizeof(cifsi->uniqueid),
+ &auxdata, sizeof(auxdata),
+ cifsi, cifsi->vfs_inode.i_size, true);
+}
+
static void cifs_fscache_enable_inode_cookie(struct inode *inode)
{
struct cifsInodeInfo *cifsi = CIFS_I(inode);
@@ -67,21 +149,28 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
if (cifsi->fscache)
return;
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
- cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
- &cifs_fscache_inode_object_def, cifsi, true);
- cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n",
- __func__, tcon->fscache, cifsi->fscache);
- }
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE))
+ return;
+
+ cifs_fscache_acquire_inode_cookie(cifsi, tcon);
+
+ cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n",
+ __func__, tcon->fscache, cifsi->fscache);
}
void cifs_fscache_release_inode_cookie(struct inode *inode)
{
+ struct cifs_fscache_inode_auxdata auxdata;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
if (cifsi->fscache) {
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.eof = cifsi->server_eof;
+ auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
+ auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
+
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
- fscache_relinquish_cookie(cifsi->fscache, 0);
+ fscache_relinquish_cookie(cifsi->fscache, &auxdata, false);
cifsi->fscache = NULL;
}
}
@@ -93,7 +182,7 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode)
if (cifsi->fscache) {
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
fscache_uncache_all_inode_pages(cifsi->fscache, inode);
- fscache_relinquish_cookie(cifsi->fscache, 1);
+ fscache_relinquish_cookie(cifsi->fscache, NULL, true);
cifsi->fscache = NULL;
}
}
@@ -110,16 +199,14 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode)
{
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
struct fscache_cookie *old = cifsi->fscache;
if (cifsi->fscache) {
/* retire the current fscache cache and get a new one */
- fscache_relinquish_cookie(cifsi->fscache, 1);
+ fscache_relinquish_cookie(cifsi->fscache, NULL, true);
- cifsi->fscache = fscache_acquire_cookie(
- cifs_sb_master_tcon(cifs_sb)->fscache,
- &cifs_fscache_inode_object_def,
- cifsi, true);
+ cifs_fscache_acquire_inode_cookie(cifsi, tcon);
cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n",
__func__, cifsi->fscache, old);
}
@@ -214,13 +301,15 @@ int __cifs_readpages_from_fscache(struct inode *inode,
void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
int ret;
cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n",
- __func__, CIFS_I(inode)->fscache, page, inode);
- ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL);
+ __func__, cifsi->fscache, page, inode);
+ ret = fscache_write_page(cifsi->fscache, page,
+ cifsi->vfs_inode.i_size, GFP_KERNEL);
if (ret != 0)
- fscache_uncache_page(CIFS_I(inode)->fscache, page);
+ fscache_uncache_page(cifsi->fscache, page);
}
void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages)
@@ -239,4 +328,3 @@ void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
fscache_wait_on_page_write(cookie, page);
fscache_uncache_page(cookie, page);
}
-
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 24794b6cd8ec..c7e3ac251e16 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -27,6 +27,18 @@
#ifdef CONFIG_CIFS_FSCACHE
+/*
+ * Auxiliary data attached to CIFS inode within the cache
+ */
+struct cifs_fscache_inode_auxdata {
+ struct timespec last_write_time;
+ struct timespec last_change_time;
+ u64 eof;
+};
+
+/*
+ * cache.c
+ */
extern struct fscache_netfs cifs_fscache_netfs;
extern const struct fscache_cookie_def cifs_fscache_server_index_def;
extern const struct fscache_cookie_def cifs_fscache_super_index_def;
@@ -34,6 +46,7 @@ extern const struct fscache_cookie_def cifs_fscache_inode_object_def;
extern int cifs_fscache_register(void);
extern void cifs_fscache_unregister(void);
+extern char *extract_sharename(const char *);
/*
* fscache.c
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1357ef563893..874607bb6e02 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -315,8 +315,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
dio_warn_stale_pagecache(dio->iocb->ki_filp);
}
- if (!(dio->flags & DIO_SKIP_DIO_COUNT))
- inode_dio_end(dio->inode);
+ inode_dio_end(dio->inode);
if (flags & DIO_COMPLETE_ASYNC) {
/*
@@ -1178,9 +1177,9 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
unsigned blkbits = i_blkbits;
unsigned blocksize_mask = (1 << blkbits) - 1;
ssize_t retval = -EINVAL;
- size_t count = iov_iter_count(iter);
+ const size_t count = iov_iter_count(iter);
loff_t offset = iocb->ki_pos;
- loff_t end = offset + count;
+ const loff_t end = offset + count;
struct dio *dio;
struct dio_submit sdio = { 0, };
struct buffer_head map_bh = { 0, };
@@ -1201,7 +1200,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
}
/* watch out for a 0 len io from a tricksy fs */
- if (iov_iter_rw(iter) == READ && !iov_iter_count(iter))
+ if (iov_iter_rw(iter) == READ && !count)
return 0;
dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
@@ -1252,8 +1251,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
*/
if (is_sync_kiocb(iocb))
dio->is_async = false;
- else if (!(dio->flags & DIO_ASYNC_EXTEND) &&
- iov_iter_rw(iter) == WRITE && end > i_size_read(inode))
+ else if (iov_iter_rw(iter) == WRITE && end > i_size_read(inode))
dio->is_async = false;
else
dio->is_async = true;
@@ -1297,8 +1295,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
/*
* Will be decremented at I/O completion time.
*/
- if (!(dio->flags & DIO_SKIP_DIO_COUNT))
- inode_dio_begin(inode);
+ inode_dio_begin(inode);
retval = 0;
sdio.blkbits = blkbits;
@@ -1318,8 +1315,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
dio->should_dirty = (iter->type == ITER_IOVEC);
sdio.iter = iter;
- sdio.final_block_in_request =
- (offset + iov_iter_count(iter)) >> blkbits;
+ sdio.final_block_in_request = end >> blkbits;
/*
* In case of non-aligned buffers, we may need 2 more
diff --git a/fs/exec.c b/fs/exec.c
index 7eb8d21bcab9..a919a827d181 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -895,13 +895,13 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0)
return -EINVAL;
- ret = security_kernel_read_file(file, id);
+ ret = deny_write_access(file);
if (ret)
return ret;
- ret = deny_write_access(file);
+ ret = security_kernel_read_file(file, id);
if (ret)
- return ret;
+ goto out;
i_size = i_size_read(file_inode(file));
if (max_size > 0 && i_size > max_size) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7666c065b96f..de1694512f1f 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -827,7 +827,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
unsigned long logic_sb_block;
unsigned long offset = 0;
unsigned long def_mount_opts;
- long ret = -EINVAL;
+ long ret = -ENOMEM;
int blocksize = BLOCK_SIZE;
int db_count;
int i, j;
@@ -835,7 +835,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
int err;
struct ext2_mount_options opts;
- err = -ENOMEM;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
goto failed;
@@ -851,6 +850,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_daxdev = dax_dev;
spin_lock_init(&sbi->s_lock);
+ ret = -EINVAL;
/*
* See what the current blocksize for the device is, and
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 18aa2ef963ad..129205028300 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5024,12 +5024,12 @@ static int other_inode_match(struct inode * inode, unsigned long ino,
if ((inode->i_ino != ino) ||
(inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
- I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
+ I_DIRTY_INODE)) ||
((inode->i_state & I_DIRTY_TIME) == 0))
return 0;
spin_lock(&inode->i_lock);
if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
- I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
+ I_DIRTY_INODE)) == 0) &&
(inode->i_state & I_DIRTY_TIME)) {
struct ext4_inode_info *ei = EXT4_I(inode);
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 512dca8abc7d..bf779461df13 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -68,6 +68,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.old_blkaddr = index,
.new_blkaddr = index,
.encrypted_page = NULL,
+ .is_meta = is_meta,
};
if (unlikely(!is_meta))
@@ -162,6 +163,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
.encrypted_page = NULL,
.in_list = false,
+ .is_meta = (type != META_POR),
};
struct blk_plug plug;
@@ -569,13 +571,8 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
struct node_info ni;
int err = acquire_orphan_inode(sbi);
- if (err) {
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: orphan failed (ino=%x), run fsck to fix.",
- __func__, ino);
- return err;
- }
+ if (err)
+ goto err_out;
__add_ino_entry(sbi, ino, 0, ORPHAN_INO);
@@ -589,6 +586,11 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
return PTR_ERR(inode);
}
+ err = dquot_initialize(inode);
+ if (err)
+ goto err_out;
+
+ dquot_initialize(inode);
clear_nlink(inode);
/* truncate all the data during iput */
@@ -598,14 +600,18 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
/* ENOMEM was fully retried in f2fs_evict_inode. */
if (ni.blk_addr != NULL_ADDR) {
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: orphan failed (ino=%x) by kernel, retry mount.",
- __func__, ino);
- return -EIO;
+ err = -EIO;
+ goto err_out;
}
__remove_ino_entry(sbi, ino, ORPHAN_INO);
return 0;
+
+err_out:
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: orphan failed (ino=%x), run fsck to fix.",
+ __func__, ino);
+ return err;
}
int recover_orphan_inodes(struct f2fs_sb_info *sbi)
@@ -1136,6 +1142,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (cpc->reason & CP_TRIMMED)
__set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
+ else
+ __clear_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
if (cpc->reason & CP_UMOUNT)
__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
@@ -1162,6 +1170,39 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
spin_unlock_irqrestore(&sbi->cp_lock, flags);
}
+static void commit_checkpoint(struct f2fs_sb_info *sbi,
+ void *src, block_t blk_addr)
+{
+ struct writeback_control wbc = {
+ .for_reclaim = 0,
+ };
+
+ /*
+ * pagevec_lookup_tag and lock_page again will take
+ * some extra time. Therefore, update_meta_pages and
+ * sync_meta_pages are combined in this function.
+ */
+ struct page *page = grab_meta_page(sbi, blk_addr);
+ int err;
+
+ memcpy(page_address(page), src, PAGE_SIZE);
+ set_page_dirty(page);
+
+ f2fs_wait_on_page_writeback(page, META, true);
+ f2fs_bug_on(sbi, PageWriteback(page));
+ if (unlikely(!clear_page_dirty_for_io(page)))
+ f2fs_bug_on(sbi, 1);
+
+ /* writeout cp pack 2 page */
+ err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
+ f2fs_bug_on(sbi, err);
+
+ f2fs_put_page(page, 0);
+
+ /* submit checkpoint (with barrier if NOBARRIER is not set) */
+ f2fs_submit_merged_write(sbi, META_FLUSH);
+}
+
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -1264,16 +1305,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
}
- /* need to wait for end_io results */
- wait_on_all_pages_writeback(sbi);
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
-
- /* flush all device cache */
- err = f2fs_flush_device_cache(sbi);
- if (err)
- return err;
-
/* write out checkpoint buffer at block 0 */
update_meta_page(sbi, ckpt, start_blk++);
@@ -1301,26 +1332,26 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk += NR_CURSEG_NODE_TYPE;
}
- /* writeout checkpoint block */
- update_meta_page(sbi, ckpt, start_blk);
+ /* update user_block_counts */
+ sbi->last_valid_block_count = sbi->total_valid_block_count;
+ percpu_counter_set(&sbi->alloc_valid_block_count, 0);
+
+ /* Here, we have one bio having CP pack except cp pack 2 page */
+ sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
- /* wait for previous submitted node/meta pages writeback */
+ /* wait for previous submitted meta pages writeback */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
- filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX);
- filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX);
-
- /* update user_block_counts */
- sbi->last_valid_block_count = sbi->total_valid_block_count;
- percpu_counter_set(&sbi->alloc_valid_block_count, 0);
-
- /* Here, we only have one bio having CP pack */
- sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO);
+ /* flush all device cache */
+ err = f2fs_flush_device_cache(sbi);
+ if (err)
+ return err;
- /* wait for previous submitted meta pages writeback */
+ /* barrier and flush checkpoint cp pack 2 page if it can */
+ commit_checkpoint(sbi, ckpt, start_blk);
wait_on_all_pages_writeback(sbi);
release_ino_entry(sbi, false);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7578ed1a85e0..db50686f5096 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -175,15 +175,22 @@ static bool __same_bdev(struct f2fs_sb_info *sbi,
*/
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
struct writeback_control *wbc,
- int npages, bool is_read)
+ int npages, bool is_read,
+ enum page_type type, enum temp_type temp)
{
struct bio *bio;
bio = f2fs_bio_alloc(sbi, npages, true);
f2fs_target_device(sbi, blk_addr, bio);
- bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
- bio->bi_private = is_read ? NULL : sbi;
+ if (is_read) {
+ bio->bi_end_io = f2fs_read_end_io;
+ bio->bi_private = NULL;
+ } else {
+ bio->bi_end_io = f2fs_write_end_io;
+ bio->bi_private = sbi;
+ bio->bi_write_hint = io_type_to_rw_hint(sbi, type, temp);
+ }
if (wbc)
wbc_init_bio(wbc, bio);
@@ -196,13 +203,12 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
if (!is_read_io(bio_op(bio))) {
unsigned int start;
- if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
- current->plug && (type == DATA || type == NODE))
- blk_finish_plug(current->plug);
-
if (type != DATA && type != NODE)
goto submit_io;
+ if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug)
+ blk_finish_plug(current->plug);
+
start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
start %= F2FS_IO_SIZE(sbi);
@@ -377,12 +383,13 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
struct page *page = fio->encrypted_page ?
fio->encrypted_page : fio->page;
+ verify_block_addr(fio, fio->new_blkaddr);
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
/* Allocate a new bio */
bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
- 1, is_read_io(fio->op));
+ 1, is_read_io(fio->op), fio->type, fio->temp);
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
bio_put(bio);
@@ -422,8 +429,8 @@ next:
}
if (fio->old_blkaddr != NEW_ADDR)
- verify_block_addr(sbi, fio->old_blkaddr);
- verify_block_addr(sbi, fio->new_blkaddr);
+ verify_block_addr(fio, fio->old_blkaddr);
+ verify_block_addr(fio, fio->new_blkaddr);
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
@@ -445,7 +452,8 @@ alloc_new:
goto out_fail;
}
io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
- BIO_MAX_PAGES, false);
+ BIO_MAX_PAGES, false,
+ fio->type, fio->temp);
io->fio = *fio;
}
@@ -832,13 +840,6 @@ alloc:
return 0;
}
-static inline bool __force_buffered_io(struct inode *inode, int rw)
-{
- return (f2fs_encrypted_file(inode) ||
- (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
- F2FS_I_SB(inode)->s_ndevs);
-}
-
int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
@@ -870,7 +871,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
if (direct_io) {
map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint);
- flag = __force_buffered_io(inode, WRITE) ?
+ flag = f2fs_force_buffered_io(inode, WRITE) ?
F2FS_GET_BLOCK_PRE_AIO :
F2FS_GET_BLOCK_PRE_DIO;
goto map_blocks;
@@ -1114,6 +1115,31 @@ out:
return err;
}
+bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
+{
+ struct f2fs_map_blocks map;
+ block_t last_lblk;
+ int err;
+
+ if (pos + len > i_size_read(inode))
+ return false;
+
+ map.m_lblk = F2FS_BYTES_TO_BLK(pos);
+ map.m_next_pgofs = NULL;
+ map.m_next_extent = NULL;
+ map.m_seg_type = NO_CHECK_TYPE;
+ last_lblk = F2FS_BLK_ALIGN(pos + len);
+
+ while (map.m_lblk < last_lblk) {
+ map.m_len = last_lblk - map.m_lblk;
+ err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
+ if (err || map.m_len == 0)
+ return false;
+ map.m_lblk += map.m_len;
+ }
+ return true;
+}
+
static int __get_data_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create, int flag,
pgoff_t *next_pgofs, int seg_type)
@@ -2287,25 +2313,41 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
size_t count = iov_iter_count(iter);
loff_t offset = iocb->ki_pos;
int rw = iov_iter_rw(iter);
int err;
+ enum rw_hint hint = iocb->ki_hint;
+ int whint_mode = F2FS_OPTION(sbi).whint_mode;
err = check_direct_IO(inode, iter, offset);
if (err)
return err;
- if (__force_buffered_io(inode, rw))
+ if (f2fs_force_buffered_io(inode, rw))
return 0;
trace_f2fs_direct_IO_enter(inode, offset, count, rw);
- down_read(&F2FS_I(inode)->dio_rwsem[rw]);
+ if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
+ iocb->ki_hint = WRITE_LIFE_NOT_SET;
+
+ if (!down_read_trylock(&F2FS_I(inode)->dio_rwsem[rw])) {
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ iocb->ki_hint = hint;
+ err = -EAGAIN;
+ goto out;
+ }
+ down_read(&F2FS_I(inode)->dio_rwsem[rw]);
+ }
+
err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
up_read(&F2FS_I(inode)->dio_rwsem[rw]);
if (rw == WRITE) {
+ if (whint_mode == WHINT_MODE_OFF)
+ iocb->ki_hint = hint;
if (err > 0) {
f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
err);
@@ -2315,6 +2357,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
}
}
+out:
trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
return err;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index f00b5ed8c011..fe661274ff10 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -94,14 +94,12 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
struct f2fs_dir_entry *de;
struct f2fs_dentry_ptr d;
- dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page);
+ dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
make_dentry_ptr_block(NULL, &d, dentry_blk);
de = find_target_dentry(fname, namehash, max_slots, &d);
if (de)
*res_page = dentry_page;
- else
- kunmap(dentry_page);
return de;
}
@@ -287,7 +285,6 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr,
de = f2fs_find_entry(dir, qstr, page);
if (de) {
res = le32_to_cpu(de->ino);
- f2fs_dentry_kunmap(dir, *page);
f2fs_put_page(*page, 0);
}
@@ -302,7 +299,6 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
f2fs_wait_on_page_writeback(page, type, true);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode->i_mode);
- f2fs_dentry_kunmap(dir, page);
set_page_dirty(page);
dir->i_mtime = dir->i_ctime = current_time(dir);
@@ -350,13 +346,11 @@ static int make_empty_dir(struct inode *inode,
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
- dentry_blk = kmap_atomic(dentry_page);
+ dentry_blk = page_address(dentry_page);
make_dentry_ptr_block(NULL, &d, dentry_blk);
do_make_empty_dir(inode, parent, &d);
- kunmap_atomic(dentry_blk);
-
set_page_dirty(dentry_page);
f2fs_put_page(dentry_page, 1);
return 0;
@@ -367,6 +361,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
struct page *dpage)
{
struct page *page;
+ int dummy_encrypt = DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(dir));
int err;
if (is_inode_flag_set(inode, FI_NEW_INODE)) {
@@ -393,7 +388,8 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
if (err)
goto put_error;
- if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) {
+ if ((f2fs_encrypted_inode(dir) || dummy_encrypt) &&
+ f2fs_may_encrypt(inode)) {
err = fscrypt_inherit_context(dir, inode, page, false);
if (err)
goto put_error;
@@ -402,8 +398,6 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
page = get_node_page(F2FS_I_SB(dir), inode->i_ino);
if (IS_ERR(page))
return page;
-
- set_cold_node(inode, page);
}
if (new_name) {
@@ -547,13 +541,12 @@ start:
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
- dentry_blk = kmap(dentry_page);
+ dentry_blk = page_address(dentry_page);
bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
slots, NR_DENTRY_IN_BLOCK);
if (bit_pos < NR_DENTRY_IN_BLOCK)
goto add_dentry;
- kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
@@ -588,7 +581,6 @@ fail:
if (inode)
up_write(&F2FS_I(inode)->i_sem);
- kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
return err;
@@ -642,7 +634,6 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
F2FS_I(dir)->task = NULL;
}
if (de) {
- f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
err = -EEXIST;
} else if (IS_ERR(page)) {
@@ -713,7 +704,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
- add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO);
+ if (F2FS_OPTION(F2FS_I_SB(dir)).fsync_mode == FSYNC_MODE_STRICT)
+ add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO);
if (f2fs_has_inline_dentry(dir))
return f2fs_delete_inline_entry(dentry, page, dir, inode);
@@ -730,7 +722,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK,
0);
- kunmap(page); /* kunmap - pair of f2fs_find_entry */
set_page_dirty(page);
dir->i_ctime = dir->i_mtime = current_time(dir);
@@ -775,7 +766,7 @@ bool f2fs_empty_dir(struct inode *dir)
return false;
}
- dentry_blk = kmap_atomic(dentry_page);
+ dentry_blk = page_address(dentry_page);
if (bidx == 0)
bit_pos = 2;
else
@@ -783,7 +774,6 @@ bool f2fs_empty_dir(struct inode *dir)
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK,
bit_pos);
- kunmap_atomic(dentry_blk);
f2fs_put_page(dentry_page, 1);
@@ -901,19 +891,17 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
}
}
- dentry_blk = kmap(dentry_page);
+ dentry_blk = page_address(dentry_page);
make_dentry_ptr_block(inode, &d, dentry_blk);
err = f2fs_fill_dentries(ctx, &d,
n * NR_DENTRY_IN_BLOCK, &fstr);
if (err) {
- kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
break;
}
- kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
out_free:
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index ff2352a0ed15..d5a861bf2b42 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -460,7 +460,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode,
struct rb_node *insert_parent)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct rb_node **p = &et->root.rb_node;
+ struct rb_node **p;
struct rb_node *parent = NULL;
struct extent_node *en = NULL;
@@ -706,6 +706,9 @@ void f2fs_drop_extent_tree(struct inode *inode)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
+ if (!f2fs_may_extent_tree(inode))
+ return;
+
set_inode_flag(inode, FI_NO_EXTENT);
write_lock(&et->lock);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 6300ac5bcbe4..1df7f10476d6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -98,9 +98,10 @@ extern char *fault_name[FAULT_MAX];
#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000
#define F2FS_MOUNT_RESERVE_ROOT 0x01000000
-#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
-#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
-#define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option)
+#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
+#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
+#define set_opt(sbi, option) (F2FS_OPTION(sbi).opt |= F2FS_MOUNT_##option)
+#define test_opt(sbi, option) (F2FS_OPTION(sbi).opt & F2FS_MOUNT_##option)
#define ver_after(a, b) (typecheck(unsigned long long, a) && \
typecheck(unsigned long long, b) && \
@@ -113,7 +114,26 @@ typedef u32 block_t; /*
typedef u32 nid_t;
struct f2fs_mount_info {
- unsigned int opt;
+ unsigned int opt;
+ int write_io_size_bits; /* Write IO size bits */
+ block_t root_reserved_blocks; /* root reserved blocks */
+ kuid_t s_resuid; /* reserved blocks for uid */
+ kgid_t s_resgid; /* reserved blocks for gid */
+ int active_logs; /* # of active logs */
+ int inline_xattr_size; /* inline xattr size */
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ struct f2fs_fault_info fault_info; /* For fault injection */
+#endif
+#ifdef CONFIG_QUOTA
+ /* Names of quota files with journalled quota */
+ char *s_qf_names[MAXQUOTAS];
+ int s_jquota_fmt; /* Format of quota to use */
+#endif
+ /* For which write hints are passed down to block layer */
+ int whint_mode;
+ int alloc_mode; /* segment allocation policy */
+ int fsync_mode; /* fsync policy */
+ bool test_dummy_encryption; /* test dummy encryption */
};
#define F2FS_FEATURE_ENCRYPT 0x0001
@@ -125,6 +145,8 @@ struct f2fs_mount_info {
#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040
#define F2FS_FEATURE_QUOTA_INO 0x0080
#define F2FS_FEATURE_INODE_CRTIME 0x0100
+#define F2FS_FEATURE_LOST_FOUND 0x0200
+#define F2FS_FEATURE_VERITY 0x0400 /* reserved */
#define F2FS_HAS_FEATURE(sb, mask) \
((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -450,7 +472,7 @@ static inline void make_dentry_ptr_block(struct inode *inode,
d->inode = inode;
d->max = NR_DENTRY_IN_BLOCK;
d->nr_bitmap = SIZE_OF_DENTRY_BITMAP;
- d->bitmap = &t->dentry_bitmap;
+ d->bitmap = t->dentry_bitmap;
d->dentry = t->dentry;
d->filename = t->filename;
}
@@ -576,6 +598,8 @@ enum {
#define FADVISE_ENCRYPT_BIT 0x04
#define FADVISE_ENC_NAME_BIT 0x08
#define FADVISE_KEEP_SIZE_BIT 0x10
+#define FADVISE_HOT_BIT 0x20
+#define FADVISE_VERITY_BIT 0x40 /* reserved */
#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
@@ -590,6 +614,9 @@ enum {
#define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT)
#define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT)
#define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT)
+#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT)
+#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT)
+#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT)
#define DEF_DIR_LEVEL 0
@@ -637,6 +664,7 @@ struct f2fs_inode_info {
kprojid_t i_projid; /* id for project quota */
int i_inline_xattr_size; /* inline xattr size */
struct timespec i_crtime; /* inode creation time */
+ struct timespec i_disk_time[4]; /* inode disk times */
};
static inline void get_extent_info(struct extent_info *ext,
@@ -743,7 +771,7 @@ struct f2fs_nm_info {
unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */
spinlock_t nid_list_lock; /* protect nid lists ops */
struct mutex build_lock; /* lock for build free nids */
- unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
+ unsigned char **free_nid_bitmap;
unsigned char *nat_block_bitmap;
unsigned short *free_nid_count; /* free nid count of NAT block */
@@ -976,6 +1004,7 @@ struct f2fs_io_info {
bool submitted; /* indicate IO submission */
int need_lock; /* indicate we need to lock cp_rwsem */
bool in_list; /* indicate fio is in io_list */
+ bool is_meta; /* indicate borrow meta inode mapping or not */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
};
@@ -1037,10 +1066,34 @@ enum {
MAX_TIME,
};
+enum {
+ WHINT_MODE_OFF, /* not pass down write hints */
+ WHINT_MODE_USER, /* try to pass down hints given by users */
+ WHINT_MODE_FS, /* pass down hints with F2FS policy */
+};
+
+enum {
+ ALLOC_MODE_DEFAULT, /* stay default */
+ ALLOC_MODE_REUSE, /* reuse segments as much as possible */
+};
+
+enum fsync_mode {
+ FSYNC_MODE_POSIX, /* fsync follows posix semantics */
+ FSYNC_MODE_STRICT, /* fsync behaves in line with ext4 */
+};
+
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#define DUMMY_ENCRYPTION_ENABLED(sbi) \
+ (unlikely(F2FS_OPTION(sbi).test_dummy_encryption))
+#else
+#define DUMMY_ENCRYPTION_ENABLED(sbi) (0)
+#endif
+
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
struct f2fs_super_block *raw_super; /* raw super block pointer */
+ struct rw_semaphore sb_lock; /* lock for raw super block */
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
@@ -1060,7 +1113,6 @@ struct f2fs_sb_info {
struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */
struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE];
/* bio ordering for NODE/DATA */
- int write_io_size_bits; /* Write IO size bits */
mempool_t *write_io_dummy; /* Dummy pages */
/* for checkpoint */
@@ -1110,9 +1162,7 @@ struct f2fs_sb_info {
unsigned int total_node_count; /* total node block count */
unsigned int total_valid_node_count; /* valid node block count */
loff_t max_file_blocks; /* max block index of file */
- int active_logs; /* # of active logs */
int dir_level; /* directory level */
- int inline_xattr_size; /* inline xattr size */
unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */
int readdir_ra; /* readahead inode in readdir */
@@ -1122,9 +1172,6 @@ struct f2fs_sb_info {
block_t last_valid_block_count; /* for recovery */
block_t reserved_blocks; /* configurable reserved blocks */
block_t current_reserved_blocks; /* current reserved blocks */
- block_t root_reserved_blocks; /* root reserved blocks */
- kuid_t s_resuid; /* reserved blocks for uid */
- kgid_t s_resgid; /* reserved blocks for gid */
unsigned int nquota_files; /* # of quota sysfile */
@@ -1209,17 +1256,6 @@ struct f2fs_sb_info {
/* Precomputed FS UUID checksum for seeding other checksums */
__u32 s_chksum_seed;
-
- /* For fault injection */
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- struct f2fs_fault_info fault_info;
-#endif
-
-#ifdef CONFIG_QUOTA
- /* Names of quota files with journalled quota */
- char *s_qf_names[MAXQUOTAS];
- int s_jquota_fmt; /* Format of quota to use */
-#endif
};
#ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -1229,7 +1265,7 @@ struct f2fs_sb_info {
__func__, __builtin_return_address(0))
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
{
- struct f2fs_fault_info *ffi = &sbi->fault_info;
+ struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info;
if (!ffi->inject_rate)
return false;
@@ -1586,12 +1622,12 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
return false;
if (IS_NOQUOTA(inode))
return true;
- if (capable(CAP_SYS_RESOURCE))
+ if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid()))
return true;
- if (uid_eq(sbi->s_resuid, current_fsuid()))
+ if (!gid_eq(F2FS_OPTION(sbi).s_resgid, GLOBAL_ROOT_GID) &&
+ in_group_p(F2FS_OPTION(sbi).s_resgid))
return true;
- if (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) &&
- in_group_p(sbi->s_resgid))
+ if (capable(CAP_SYS_RESOURCE))
return true;
return false;
}
@@ -1627,7 +1663,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
sbi->current_reserved_blocks;
if (!__allow_reserved_blocks(sbi, inode))
- avail_user_block_count -= sbi->root_reserved_blocks;
+ avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
diff = sbi->total_valid_block_count - avail_user_block_count;
@@ -1762,6 +1798,12 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
int offset;
+ if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) {
+ offset = (flag == SIT_BITMAP) ?
+ le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0;
+ return &ckpt->sit_nat_version_bitmap + offset;
+ }
+
if (__cp_payload(sbi) > 0) {
if (flag == NAT_BITMAP)
return &ckpt->sit_nat_version_bitmap;
@@ -1828,7 +1870,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
sbi->current_reserved_blocks + 1;
if (!__allow_reserved_blocks(sbi, inode))
- valid_block_count += sbi->root_reserved_blocks;
+ valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
if (unlikely(valid_block_count > sbi->user_block_count)) {
spin_unlock(&sbi->stat_lock);
@@ -2399,12 +2441,6 @@ static inline int f2fs_has_inline_dentry(struct inode *inode)
return is_inode_flag_set(inode, FI_INLINE_DENTRY);
}
-static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page)
-{
- if (!f2fs_has_inline_dentry(dir))
- kunmap(page);
-}
-
static inline int is_file(struct inode *inode, int type)
{
return F2FS_I(inode)->i_advise & type;
@@ -2436,7 +2472,17 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
}
if (!is_inode_flag_set(inode, FI_AUTO_RECOVER) ||
file_keep_isize(inode) ||
- i_size_read(inode) & PAGE_MASK)
+ i_size_read(inode) & ~PAGE_MASK)
+ return false;
+
+ if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime))
+ return false;
+ if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime))
+ return false;
+ if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime))
+ return false;
+ if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3,
+ &F2FS_I(inode)->i_crtime))
return false;
down_read(&F2FS_I(inode)->i_sem);
@@ -2446,9 +2492,9 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
return ret;
}
-static inline int f2fs_readonly(struct super_block *sb)
+static inline bool f2fs_readonly(struct super_block *sb)
{
- return sb->s_flags & SB_RDONLY;
+ return sb_rdonly(sb);
}
static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi)
@@ -2596,6 +2642,8 @@ void handle_failed_inode(struct inode *inode);
/*
* namei.c
*/
+int update_extension_list(struct f2fs_sb_info *sbi, const char *name,
+ bool hot, bool set);
struct dentry *f2fs_get_parent(struct dentry *child);
/*
@@ -2768,6 +2816,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi);
int __init create_segment_manager_caches(void);
void destroy_segment_manager_caches(void);
int rw_hint_to_seg_type(enum rw_hint hint);
+enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type,
+ enum temp_type temp);
/*
* checkpoint.c
@@ -2850,6 +2900,7 @@ int f2fs_release_page(struct page *page, gfp_t wait);
int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
struct page *page, enum migrate_mode mode);
#endif
+bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
/*
* gc.c
@@ -3172,45 +3223,21 @@ static inline bool f2fs_bio_encrypted(struct bio *bio)
return bio->bi_private != NULL;
}
-static inline int f2fs_sb_has_crypto(struct super_block *sb)
-{
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT);
-}
-
-static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb)
-{
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED);
-}
-
-static inline int f2fs_sb_has_extra_attr(struct super_block *sb)
-{
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_EXTRA_ATTR);
-}
-
-static inline int f2fs_sb_has_project_quota(struct super_block *sb)
-{
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_PRJQUOTA);
-}
-
-static inline int f2fs_sb_has_inode_chksum(struct super_block *sb)
-{
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM);
-}
-
-static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb)
-{
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR);
-}
-
-static inline int f2fs_sb_has_quota_ino(struct super_block *sb)
-{
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_QUOTA_INO);
+#define F2FS_FEATURE_FUNCS(name, flagname) \
+static inline int f2fs_sb_has_##name(struct super_block *sb) \
+{ \
+ return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \
}
-static inline int f2fs_sb_has_inode_crtime(struct super_block *sb)
-{
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CRTIME);
-}
+F2FS_FEATURE_FUNCS(encrypt, ENCRYPT);
+F2FS_FEATURE_FUNCS(blkzoned, BLKZONED);
+F2FS_FEATURE_FUNCS(extra_attr, EXTRA_ATTR);
+F2FS_FEATURE_FUNCS(project_quota, PRJQUOTA);
+F2FS_FEATURE_FUNCS(inode_chksum, INODE_CHKSUM);
+F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR);
+F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO);
+F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME);
+F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
#ifdef CONFIG_BLK_DEV_ZONED
static inline int get_blkz_type(struct f2fs_sb_info *sbi,
@@ -3230,7 +3257,7 @@ static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
{
struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
- return blk_queue_discard(q) || f2fs_sb_mounted_blkzoned(sbi->sb);
+ return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb);
}
static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
@@ -3259,4 +3286,11 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
#endif
}
+static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
+{
+ return (f2fs_encrypted_file(inode) ||
+ (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
+ F2FS_I_SB(inode)->s_ndevs);
+}
+
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 672a542e5464..6b94f19b3fa8 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -163,9 +163,10 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
cp_reason = CP_NODE_NEED_CP;
else if (test_opt(sbi, FASTBOOT))
cp_reason = CP_FASTBOOT_MODE;
- else if (sbi->active_logs == 2)
+ else if (F2FS_OPTION(sbi).active_logs == 2)
cp_reason = CP_SPEC_LOG_NUM;
- else if (need_dentry_mark(sbi, inode->i_ino) &&
+ else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT &&
+ need_dentry_mark(sbi, inode->i_ino) &&
exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO))
cp_reason = CP_RECOVER_DIR;
@@ -479,6 +480,9 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
if (err)
return err;
+
+ filp->f_mode |= FMODE_NOWAIT;
+
return dquot_file_open(inode, filp);
}
@@ -569,7 +573,6 @@ truncate_out:
int truncate_blocks(struct inode *inode, u64 from, bool lock)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- unsigned int blocksize = inode->i_sb->s_blocksize;
struct dnode_of_data dn;
pgoff_t free_from;
int count = 0, err = 0;
@@ -578,7 +581,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
trace_f2fs_truncate_blocks_enter(inode, from);
- free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1);
+ free_from = (pgoff_t)F2FS_BLK_ALIGN(from);
if (free_from >= sbi->max_file_blocks)
goto free_partial;
@@ -1348,8 +1351,12 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
}
out:
- if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
- f2fs_i_size_write(inode, new_size);
+ if (new_size > i_size_read(inode)) {
+ if (mode & FALLOC_FL_KEEP_SIZE)
+ file_set_keep_isize(inode);
+ else
+ f2fs_i_size_write(inode, new_size);
+ }
out_sem:
up_write(&F2FS_I(inode)->i_mmap_sem);
@@ -1711,6 +1718,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
inode_lock(inode);
+ down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
if (f2fs_is_volatile_file(inode))
goto err_out;
@@ -1729,6 +1738,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
}
err_out:
+ up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1938,7 +1948,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- if (!f2fs_sb_has_crypto(inode->i_sb))
+ if (!f2fs_sb_has_encrypt(inode->i_sb))
return -EOPNOTSUPP;
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
@@ -1948,7 +1958,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
{
- if (!f2fs_sb_has_crypto(file_inode(filp)->i_sb))
+ if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb))
return -EOPNOTSUPP;
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
}
@@ -1959,16 +1969,18 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err;
- if (!f2fs_sb_has_crypto(inode->i_sb))
+ if (!f2fs_sb_has_encrypt(inode->i_sb))
return -EOPNOTSUPP;
- if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
- goto got_it;
-
err = mnt_want_write_file(filp);
if (err)
return err;
+ down_write(&sbi->sb_lock);
+
+ if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
+ goto got_it;
+
/* update superblock with uuid */
generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
@@ -1976,15 +1988,16 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
if (err) {
/* undo new data */
memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
- mnt_drop_write_file(filp);
- return err;
+ goto out_err;
}
- mnt_drop_write_file(filp);
got_it:
if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt,
16))
- return -EFAULT;
- return 0;
+ err = -EFAULT;
+out_err:
+ up_write(&sbi->sb_lock);
+ mnt_drop_write_file(filp);
+ return err;
}
static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
@@ -2045,8 +2058,10 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
return ret;
end = range.start + range.len;
- if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi))
- return -EINVAL;
+ if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) {
+ ret = -EINVAL;
+ goto out;
+ }
do_more:
if (!range.sync) {
if (!mutex_trylock(&sbi->gc_mutex)) {
@@ -2885,25 +2900,54 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
- inode_lock(inode);
+ if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+ return -EINVAL;
+
+ if (!inode_trylock(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock(inode);
+ }
+
ret = generic_write_checks(iocb, from);
if (ret > 0) {
+ bool preallocated = false;
+ size_t target_size = 0;
int err;
if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
set_inode_flag(inode, FI_NO_PREALLOC);
- err = f2fs_preallocate_blocks(iocb, from);
- if (err) {
- clear_inode_flag(inode, FI_NO_PREALLOC);
- inode_unlock(inode);
- return err;
+ if ((iocb->ki_flags & IOCB_NOWAIT) &&
+ (iocb->ki_flags & IOCB_DIRECT)) {
+ if (!f2fs_overwrite_io(inode, iocb->ki_pos,
+ iov_iter_count(from)) ||
+ f2fs_has_inline_data(inode) ||
+ f2fs_force_buffered_io(inode, WRITE)) {
+ inode_unlock(inode);
+ return -EAGAIN;
+ }
+
+ } else {
+ preallocated = true;
+ target_size = iocb->ki_pos + iov_iter_count(from);
+
+ err = f2fs_preallocate_blocks(iocb, from);
+ if (err) {
+ clear_inode_flag(inode, FI_NO_PREALLOC);
+ inode_unlock(inode);
+ return err;
+ }
}
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
blk_finish_plug(&plug);
clear_inode_flag(inode, FI_NO_PREALLOC);
+ /* if we couldn't write data, we should deallocate blocks. */
+ if (preallocated && i_size_read(inode) < target_size)
+ f2fs_truncate(inode);
+
if (ret > 0)
f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index aa720cc44509..bfb7a4a3a929 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -76,14 +76,15 @@ static int gc_thread_func(void *data)
* invalidated soon after by user update or deletion.
* So, I'd like to wait some time to collect dirty segments.
*/
- if (!mutex_trylock(&sbi->gc_mutex))
- goto next;
-
if (gc_th->gc_urgent) {
wait_ms = gc_th->urgent_sleep_time;
+ mutex_lock(&sbi->gc_mutex);
goto do_gc;
}
+ if (!mutex_trylock(&sbi->gc_mutex))
+ goto next;
+
if (!is_idle(sbi)) {
increase_sleep_time(gc_th, &wait_ms);
mutex_unlock(&sbi->gc_mutex);
@@ -161,12 +162,17 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
{
int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
- if (gc_th && gc_th->gc_idle) {
+ if (!gc_th)
+ return gc_mode;
+
+ if (gc_th->gc_idle) {
if (gc_th->gc_idle == 1)
gc_mode = GC_CB;
else if (gc_th->gc_idle == 2)
gc_mode = GC_GREEDY;
}
+ if (gc_th->gc_urgent)
+ gc_mode = GC_GREEDY;
return gc_mode;
}
@@ -188,11 +194,14 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
}
/* we need to check every dirty segments in the FG_GC case */
- if (gc_type != FG_GC && p->max_search > sbi->max_victim_search)
+ if (gc_type != FG_GC &&
+ (sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
+ p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
- /* let's select beginning hot/small space first */
- if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
+ /* let's select beginning hot/small space first in no_heap mode*/
+ if (test_opt(sbi, NOHEAP) &&
+ (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
p->offset = 0;
else
p->offset = SIT_I(sbi)->last_victim[p->gc_mode];
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 90e38d8ea688..3b77d6421218 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -369,7 +369,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
f2fs_wait_on_page_writeback(page, DATA, true);
zero_user_segment(page, MAX_INLINE_DATA(dir), PAGE_SIZE);
- dentry_blk = kmap_atomic(page);
+ dentry_blk = page_address(page);
make_dentry_ptr_inline(dir, &src, inline_dentry);
make_dentry_ptr_block(dir, &dst, dentry_blk);
@@ -386,7 +386,6 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max);
memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN);
- kunmap_atomic(dentry_blk);
if (!PageUptodate(page))
SetPageUptodate(page);
set_page_dirty(page);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 205add3d0f3a..e0d9e8f27ed2 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -284,6 +284,10 @@ static int do_read_inode(struct inode *inode)
fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
}
+ F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
+ F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
+ F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
+ F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
f2fs_put_page(node_page, 1);
stat_inc_inline_xattr(inode);
@@ -328,7 +332,7 @@ make_now:
inode->i_op = &f2fs_dir_inode_operations;
inode->i_fop = &f2fs_dir_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
+ inode_nohighmem(inode);
} else if (S_ISLNK(inode->i_mode)) {
if (f2fs_encrypted_inode(inode))
inode->i_op = &f2fs_encrypted_symlink_inode_operations;
@@ -439,12 +443,15 @@ void update_inode(struct inode *inode, struct page *node_page)
}
__set_inode_rdev(inode, ri);
- set_cold_node(inode, node_page);
/* deleted inode */
if (inode->i_nlink == 0)
clear_inline_node(node_page);
+ F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
+ F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
+ F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
+ F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
}
void update_inode_page(struct inode *inode)
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index b68e7b03959f..d5098efe577c 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -78,7 +78,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
set_inode_flag(inode, FI_NEW_INODE);
/* If the directory encrypted, then we should encrypt the inode. */
- if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
+ if ((f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) &&
+ f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
if (f2fs_sb_has_extra_attr(sbi->sb)) {
@@ -97,7 +98,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
if (f2fs_has_inline_xattr(inode))
- xattr_size = sbi->inline_xattr_size;
+ xattr_size = F2FS_OPTION(sbi).inline_xattr_size;
/* Otherwise, will be 0 */
} else if (f2fs_has_inline_xattr(inode) ||
f2fs_has_inline_dentry(inode)) {
@@ -142,7 +143,7 @@ fail_drop:
return ERR_PTR(err);
}
-static int is_multimedia_file(const unsigned char *s, const char *sub)
+static int is_extension_exist(const unsigned char *s, const char *sub)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
@@ -168,19 +169,94 @@ static int is_multimedia_file(const unsigned char *s, const char *sub)
/*
* Set multimedia files as cold files for hot/cold data separation
*/
-static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
+static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode,
const unsigned char *name)
{
- int i;
- __u8 (*extlist)[8] = sbi->raw_super->extension_list;
+ __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
+ int i, cold_count, hot_count;
+
+ down_read(&sbi->sb_lock);
+
+ cold_count = le32_to_cpu(sbi->raw_super->extension_count);
+ hot_count = sbi->raw_super->hot_ext_count;
- int count = le32_to_cpu(sbi->raw_super->extension_count);
- for (i = 0; i < count; i++) {
- if (is_multimedia_file(name, extlist[i])) {
+ for (i = 0; i < cold_count + hot_count; i++) {
+ if (!is_extension_exist(name, extlist[i]))
+ continue;
+ if (i < cold_count)
file_set_cold(inode);
- break;
- }
+ else
+ file_set_hot(inode);
+ break;
}
+
+ up_read(&sbi->sb_lock);
+}
+
+int update_extension_list(struct f2fs_sb_info *sbi, const char *name,
+ bool hot, bool set)
+{
+ __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
+ int cold_count = le32_to_cpu(sbi->raw_super->extension_count);
+ int hot_count = sbi->raw_super->hot_ext_count;
+ int total_count = cold_count + hot_count;
+ int start, count;
+ int i;
+
+ if (set) {
+ if (total_count == F2FS_MAX_EXTENSION)
+ return -EINVAL;
+ } else {
+ if (!hot && !cold_count)
+ return -EINVAL;
+ if (hot && !hot_count)
+ return -EINVAL;
+ }
+
+ if (hot) {
+ start = cold_count;
+ count = total_count;
+ } else {
+ start = 0;
+ count = cold_count;
+ }
+
+ for (i = start; i < count; i++) {
+ if (strcmp(name, extlist[i]))
+ continue;
+
+ if (set)
+ return -EINVAL;
+
+ memcpy(extlist[i], extlist[i + 1],
+ F2FS_EXTENSION_LEN * (total_count - i - 1));
+ memset(extlist[total_count - 1], 0, F2FS_EXTENSION_LEN);
+ if (hot)
+ sbi->raw_super->hot_ext_count = hot_count - 1;
+ else
+ sbi->raw_super->extension_count =
+ cpu_to_le32(cold_count - 1);
+ return 0;
+ }
+
+ if (!set)
+ return -EINVAL;
+
+ if (hot) {
+ strncpy(extlist[count], name, strlen(name));
+ sbi->raw_super->hot_ext_count = hot_count + 1;
+ } else {
+ char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];
+
+ memcpy(buf, &extlist[cold_count],
+ F2FS_EXTENSION_LEN * hot_count);
+ memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN);
+ strncpy(extlist[cold_count], name, strlen(name));
+ memcpy(&extlist[cold_count + 1], buf,
+ F2FS_EXTENSION_LEN * hot_count);
+ sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1);
+ }
+ return 0;
}
static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
@@ -203,7 +279,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
return PTR_ERR(inode);
if (!test_opt(sbi, DISABLE_EXT_IDENTIFY))
- set_cold_files(sbi, inode, dentry->d_name.name);
+ set_file_temperature(sbi, inode, dentry->d_name.name);
inode->i_op = &f2fs_file_inode_operations;
inode->i_fop = &f2fs_file_operations;
@@ -317,7 +393,6 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
de = f2fs_find_entry(dir, &dot, &page);
if (de) {
- f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
} else if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -329,14 +404,12 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
}
de = f2fs_find_entry(dir, &dotdot, &page);
- if (de) {
- f2fs_dentry_kunmap(dir, page);
+ if (de)
f2fs_put_page(page, 0);
- } else if (IS_ERR(page)) {
+ else if (IS_ERR(page))
err = PTR_ERR(page);
- } else {
+ else
err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR);
- }
out:
if (!err)
clear_inode_flag(dir, FI_INLINE_DOTS);
@@ -377,7 +450,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
}
ino = le32_to_cpu(de->ino);
- f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
inode = f2fs_iget(dir->i_sb, ino);
@@ -452,7 +524,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
err = acquire_orphan_inode(sbi);
if (err) {
f2fs_unlock_op(sbi);
- f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
goto fail;
}
@@ -579,7 +650,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_op = &f2fs_dir_inode_operations;
inode->i_fop = &f2fs_dir_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
+ inode_nohighmem(inode);
set_inode_flag(inode, FI_INC_LINK);
f2fs_lock_op(sbi);
@@ -717,10 +788,12 @@ out:
static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
{
- if (unlikely(f2fs_cp_error(F2FS_I_SB(dir))))
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+
+ if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
- if (f2fs_encrypted_inode(dir)) {
+ if (f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) {
int err = fscrypt_get_encryption_info(dir);
if (err)
return err;
@@ -893,16 +966,15 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
if (old_dir_entry) {
- if (old_dir != new_dir && !whiteout) {
+ if (old_dir != new_dir && !whiteout)
f2fs_set_link(old_inode, old_dir_entry,
old_dir_page, new_dir);
- } else {
- f2fs_dentry_kunmap(old_inode, old_dir_page);
+ else
f2fs_put_page(old_dir_page, 0);
- }
f2fs_i_links_write(old_dir, false);
}
- add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
+ if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
+ add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
f2fs_unlock_op(sbi);
@@ -912,20 +984,15 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
put_out_dir:
f2fs_unlock_op(sbi);
- if (new_page) {
- f2fs_dentry_kunmap(new_dir, new_page);
+ if (new_page)
f2fs_put_page(new_page, 0);
- }
out_whiteout:
if (whiteout)
iput(whiteout);
out_dir:
- if (old_dir_entry) {
- f2fs_dentry_kunmap(old_inode, old_dir_page);
+ if (old_dir_entry)
f2fs_put_page(old_dir_page, 0);
- }
out_old:
- f2fs_dentry_kunmap(old_dir, old_page);
f2fs_put_page(old_page, 0);
out:
return err;
@@ -1057,8 +1124,10 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
}
f2fs_mark_inode_dirty_sync(new_dir, false);
- add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO);
- add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
+ if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) {
+ add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO);
+ add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
+ }
f2fs_unlock_op(sbi);
@@ -1067,19 +1136,15 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
return 0;
out_new_dir:
if (new_dir_entry) {
- f2fs_dentry_kunmap(new_inode, new_dir_page);
f2fs_put_page(new_dir_page, 0);
}
out_old_dir:
if (old_dir_entry) {
- f2fs_dentry_kunmap(old_inode, old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
out_new:
- f2fs_dentry_kunmap(new_dir, new_page);
f2fs_put_page(new_page, 0);
out_old:
- f2fs_dentry_kunmap(old_dir, old_page);
f2fs_put_page(old_page, 0);
out:
return err;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 177c438e4a56..9a99243054ba 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -193,8 +193,8 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
__free_nat_entry(e);
}
-static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
- struct nat_entry *ne)
+static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i,
+ struct nat_entry *ne)
{
nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
struct nat_entry_set *head;
@@ -209,15 +209,36 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
head->entry_cnt = 0;
f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
}
+ return head;
+}
+
+static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
+ struct nat_entry *ne)
+{
+ struct nat_entry_set *head;
+ bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR;
+
+ if (!new_ne)
+ head = __grab_nat_entry_set(nm_i, ne);
+
+ /*
+ * update entry_cnt in below condition:
+ * 1. update NEW_ADDR to valid block address;
+ * 2. update old block address to new one;
+ */
+ if (!new_ne && (get_nat_flag(ne, IS_PREALLOC) ||
+ !get_nat_flag(ne, IS_DIRTY)))
+ head->entry_cnt++;
+
+ set_nat_flag(ne, IS_PREALLOC, new_ne);
if (get_nat_flag(ne, IS_DIRTY))
goto refresh_list;
nm_i->dirty_nat_cnt++;
- head->entry_cnt++;
set_nat_flag(ne, IS_DIRTY, true);
refresh_list:
- if (nat_get_blkaddr(ne) == NEW_ADDR)
+ if (new_ne)
list_del_init(&ne->list);
else
list_move_tail(&ne->list, &head->entry_list);
@@ -1076,7 +1097,7 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
f2fs_wait_on_page_writeback(page, NODE, true);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
- set_cold_node(dn->inode, page);
+ set_cold_node(page, S_ISDIR(dn->inode->i_mode));
if (!PageUptodate(page))
SetPageUptodate(page);
if (set_page_dirty(page))
@@ -2291,6 +2312,7 @@ retry:
if (!PageUptodate(ipage))
SetPageUptodate(ipage);
fill_node_footer(ipage, ino, ino, 0, true);
+ set_cold_node(page, false);
src = F2FS_INODE(page);
dst = F2FS_INODE(ipage);
@@ -2580,8 +2602,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
if (!enabled_nat_bits(sbi, NULL))
return 0;
- nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
- F2FS_BLKSIZE - 1);
+ nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
nm_i->nat_bits = f2fs_kzalloc(sbi,
nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL);
if (!nm_i->nat_bits)
@@ -2707,12 +2728,20 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
static int init_free_nid_cache(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
+ int i;
- nm_i->free_nid_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks *
- NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
+ nm_i->free_nid_bitmap = f2fs_kzalloc(sbi, nm_i->nat_blocks *
+ sizeof(unsigned char *), GFP_KERNEL);
if (!nm_i->free_nid_bitmap)
return -ENOMEM;
+ for (i = 0; i < nm_i->nat_blocks; i++) {
+ nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
+ NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL);
+ if (!nm_i->free_nid_bitmap)
+ return -ENOMEM;
+ }
+
nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8,
GFP_KERNEL);
if (!nm_i->nat_block_bitmap)
@@ -2803,7 +2832,13 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
up_write(&nm_i->nat_tree_lock);
kvfree(nm_i->nat_block_bitmap);
- kvfree(nm_i->free_nid_bitmap);
+ if (nm_i->free_nid_bitmap) {
+ int i;
+
+ for (i = 0; i < nm_i->nat_blocks; i++)
+ kvfree(nm_i->free_nid_bitmap[i]);
+ kfree(nm_i->free_nid_bitmap);
+ }
kvfree(nm_i->free_nid_count);
kfree(nm_i->nat_bitmap);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 081ef0d672bf..b95e49e4a928 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -44,6 +44,7 @@ enum {
HAS_FSYNCED_INODE, /* is the inode fsynced before? */
HAS_LAST_FSYNC, /* has the latest node fsync mark? */
IS_DIRTY, /* this nat entry is dirty? */
+ IS_PREALLOC, /* nat entry is preallocated */
};
/*
@@ -422,12 +423,12 @@ static inline void clear_inline_node(struct page *page)
ClearPageChecked(page);
}
-static inline void set_cold_node(struct inode *inode, struct page *page)
+static inline void set_cold_node(struct page *page, bool is_dir)
{
struct f2fs_node *rn = F2FS_NODE(page);
unsigned int flag = le32_to_cpu(rn->footer.flag);
- if (S_ISDIR(inode->i_mode))
+ if (is_dir)
flag &= ~(0x1 << COLD_BIT_SHIFT);
else
flag |= (0x1 << COLD_BIT_SHIFT);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 337f3363f48f..1b23d3febe4c 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -144,7 +144,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
retry:
de = __f2fs_find_entry(dir, &fname, &page);
if (de && inode->i_ino == le32_to_cpu(de->ino))
- goto out_unmap_put;
+ goto out_put;
if (de) {
einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
@@ -153,19 +153,19 @@ retry:
err = PTR_ERR(einode);
if (err == -ENOENT)
err = -EEXIST;
- goto out_unmap_put;
+ goto out_put;
}
err = dquot_initialize(einode);
if (err) {
iput(einode);
- goto out_unmap_put;
+ goto out_put;
}
err = acquire_orphan_inode(F2FS_I_SB(inode));
if (err) {
iput(einode);
- goto out_unmap_put;
+ goto out_put;
}
f2fs_delete_entry(de, page, dir, einode);
iput(einode);
@@ -180,8 +180,7 @@ retry:
goto retry;
goto out;
-out_unmap_put:
- f2fs_dentry_kunmap(dir, page);
+out_put:
f2fs_put_page(page, 0);
out:
if (file_enc_name(inode))
@@ -243,6 +242,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
struct curseg_info *curseg;
struct page *page = NULL;
block_t blkaddr;
+ unsigned int loop_cnt = 0;
+ unsigned int free_blocks = sbi->user_block_count -
+ valid_user_blocks(sbi);
int err = 0;
/* get node pages in the current segment */
@@ -295,6 +297,17 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
if (IS_INODE(page) && is_dent_dnode(page))
entry->last_dentry = blkaddr;
next:
+ /* sanity check in order to detect looped node chain */
+ if (++loop_cnt >= free_blocks ||
+ blkaddr == next_blkaddr_of_node(page)) {
+ f2fs_msg(sbi->sb, KERN_NOTICE,
+ "%s: detect looped node chain, "
+ "blkaddr:%u, next:%u",
+ __func__, blkaddr, next_blkaddr_of_node(page));
+ err = -EINVAL;
+ break;
+ }
+
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index b16a8e6625aa..5854cc4e1d67 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1411,12 +1411,11 @@ static int issue_discard_thread(void *data)
if (kthread_should_stop())
return 0;
- if (dcc->discard_wake) {
+ if (dcc->discard_wake)
dcc->discard_wake = 0;
- if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
- init_discard_policy(&dpolicy,
- DPOLICY_FORCE, 1);
- }
+
+ if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
+ init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
sb_start_intwrite(sbi->sb);
@@ -1485,7 +1484,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
+ if (f2fs_sb_has_blkzoned(sbi->sb) &&
bdev_zoned_model(bdev) != BLK_ZONED_NONE)
return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
@@ -1683,7 +1682,7 @@ find_next:
sbi->blocks_per_seg, cur_pos);
len = next_pos - cur_pos;
- if (f2fs_sb_mounted_blkzoned(sbi->sb) ||
+ if (f2fs_sb_has_blkzoned(sbi->sb) ||
(force && len < cpc->trim_minlen))
goto skip;
@@ -1727,7 +1726,7 @@ void init_discard_policy(struct discard_policy *dpolicy,
} else if (discard_type == DPOLICY_FORCE) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
- dpolicy->io_aware = true;
+ dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_FSTRIM) {
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_UMOUNT) {
@@ -1863,7 +1862,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
sbi->discard_blks--;
/* don't overwrite by SSR to keep node chain */
- if (se->type == CURSEG_WARM_NODE) {
+ if (IS_NODESEG(se->type)) {
if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
se->ckpt_valid_blocks++;
}
@@ -2164,11 +2163,17 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
if (sbi->segs_per_sec != 1)
return CURSEG_I(sbi, type)->segno;
- if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
+ if (test_opt(sbi, NOHEAP) &&
+ (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
return 0;
if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
return SIT_I(sbi)->last_victim[ALLOC_NEXT];
+
+ /* find segments from 0 to reuse freed segments */
+ if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
+ return 0;
+
return CURSEG_I(sbi, type)->segno;
}
@@ -2455,6 +2460,101 @@ int rw_hint_to_seg_type(enum rw_hint hint)
}
}
+/* This returns write hints for each segment type. This hints will be
+ * passed down to block layer. There are mapping tables which depend on
+ * the mount option 'whint_mode'.
+ *
+ * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
+ *
+ * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
+ *
+ * User F2FS Block
+ * ---- ---- -----
+ * META WRITE_LIFE_NOT_SET
+ * HOT_NODE "
+ * WARM_NODE "
+ * COLD_NODE "
+ * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
+ * extension list " "
+ *
+ * -- buffered io
+ * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
+ * WRITE_LIFE_NONE " "
+ * WRITE_LIFE_MEDIUM " "
+ * WRITE_LIFE_LONG " "
+ *
+ * -- direct io
+ * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
+ * WRITE_LIFE_NONE " WRITE_LIFE_NONE
+ * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
+ * WRITE_LIFE_LONG " WRITE_LIFE_LONG
+ *
+ * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
+ *
+ * User F2FS Block
+ * ---- ---- -----
+ * META WRITE_LIFE_MEDIUM;
+ * HOT_NODE WRITE_LIFE_NOT_SET
+ * WARM_NODE "
+ * COLD_NODE WRITE_LIFE_NONE
+ * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
+ * extension list " "
+ *
+ * -- buffered io
+ * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG
+ * WRITE_LIFE_NONE " "
+ * WRITE_LIFE_MEDIUM " "
+ * WRITE_LIFE_LONG " "
+ *
+ * -- direct io
+ * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
+ * WRITE_LIFE_NONE " WRITE_LIFE_NONE
+ * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
+ * WRITE_LIFE_LONG " WRITE_LIFE_LONG
+ */
+
+enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi,
+ enum page_type type, enum temp_type temp)
+{
+ if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
+ if (type == DATA) {
+ if (temp == WARM)
+ return WRITE_LIFE_NOT_SET;
+ else if (temp == HOT)
+ return WRITE_LIFE_SHORT;
+ else if (temp == COLD)
+ return WRITE_LIFE_EXTREME;
+ } else {
+ return WRITE_LIFE_NOT_SET;
+ }
+ } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
+ if (type == DATA) {
+ if (temp == WARM)
+ return WRITE_LIFE_LONG;
+ else if (temp == HOT)
+ return WRITE_LIFE_SHORT;
+ else if (temp == COLD)
+ return WRITE_LIFE_EXTREME;
+ } else if (type == NODE) {
+ if (temp == WARM || temp == HOT)
+ return WRITE_LIFE_NOT_SET;
+ else if (temp == COLD)
+ return WRITE_LIFE_NONE;
+ } else if (type == META) {
+ return WRITE_LIFE_MEDIUM;
+ }
+ }
+ return WRITE_LIFE_NOT_SET;
+}
+
static int __get_segment_type_2(struct f2fs_io_info *fio)
{
if (fio->type == DATA)
@@ -2487,7 +2587,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (is_cold_data(fio->page) || file_is_cold(inode))
return CURSEG_COLD_DATA;
- if (is_inode_flag_set(inode, FI_HOT_DATA))
+ if (file_is_hot(inode) ||
+ is_inode_flag_set(inode, FI_HOT_DATA))
return CURSEG_HOT_DATA;
return rw_hint_to_seg_type(inode->i_write_hint);
} else {
@@ -2502,7 +2603,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
{
int type = 0;
- switch (fio->sbi->active_logs) {
+ switch (F2FS_OPTION(fio->sbi).active_logs) {
case 2:
type = __get_segment_type_2(fio);
break;
@@ -2642,6 +2743,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
struct f2fs_io_info fio = {
.sbi = sbi,
.type = META,
+ .temp = HOT,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
.old_blkaddr = page->index,
@@ -2688,8 +2790,15 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
int rewrite_data_page(struct f2fs_io_info *fio)
{
int err;
+ struct f2fs_sb_info *sbi = fio->sbi;
fio->new_blkaddr = fio->old_blkaddr;
+ /* i/o temperature is needed for passing down write hints */
+ __get_segment_type(fio);
+
+ f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
+ GET_SEGNO(sbi, fio->new_blkaddr))->type));
+
stat_inc_inplace_blocks(fio->sbi);
err = f2fs_submit_page_bio(fio);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index f11c4bc82c78..3325d0769723 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -53,13 +53,19 @@
((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
(sbi)->segs_per_sec)) \
-#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr)
-#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr)
+#define MAIN_BLKADDR(sbi) \
+ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr))
+#define SEG0_BLKADDR(sbi) \
+ (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr))
#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments)
#define MAIN_SECS(sbi) ((sbi)->total_sections)
-#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count)
+#define TOTAL_SEGS(sbi) \
+ (SM_I(sbi) ? SM_I(sbi)->segment_count : \
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count))
#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg)
#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
@@ -596,6 +602,8 @@ static inline int utilization(struct f2fs_sb_info *sbi)
#define DEF_MIN_FSYNC_BLOCKS 8
#define DEF_MIN_HOT_BLOCKS 16
+#define SMALL_VOLUME_SEGMENTS (16 * 512) /* 16GB */
+
enum {
F2FS_IPU_FORCE,
F2FS_IPU_SSR,
@@ -630,10 +638,17 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
}
-static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
+static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
{
- BUG_ON(blk_addr < SEG0_BLKADDR(sbi)
- || blk_addr >= MAX_BLKADDR(sbi));
+ struct f2fs_sb_info *sbi = fio->sbi;
+
+ if (PAGE_TYPE_OF_BIO(fio->type) == META &&
+ (!is_read_io(fio->op) || fio->is_meta))
+ BUG_ON(blk_addr < SEG0_BLKADDR(sbi) ||
+ blk_addr >= MAIN_BLKADDR(sbi));
+ else
+ BUG_ON(blk_addr < MAIN_BLKADDR(sbi) ||
+ blk_addr >= MAX_BLKADDR(sbi));
}
/*
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8173ae688814..42d564c5ccd0 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -60,7 +60,7 @@ char *fault_name[FAULT_MAX] = {
static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
unsigned int rate)
{
- struct f2fs_fault_info *ffi = &sbi->fault_info;
+ struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info;
if (rate) {
atomic_set(&ffi->inject_ops, 0);
@@ -129,6 +129,10 @@ enum {
Opt_jqfmt_vfsold,
Opt_jqfmt_vfsv0,
Opt_jqfmt_vfsv1,
+ Opt_whint,
+ Opt_alloc,
+ Opt_fsync,
+ Opt_test_dummy_encryption,
Opt_err,
};
@@ -182,6 +186,10 @@ static match_table_t f2fs_tokens = {
{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
{Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
+ {Opt_whint, "whint_mode=%s"},
+ {Opt_alloc, "alloc_mode=%s"},
+ {Opt_fsync, "fsync_mode=%s"},
+ {Opt_test_dummy_encryption, "test_dummy_encryption"},
{Opt_err, NULL},
};
@@ -202,21 +210,24 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
block_t limit = (sbi->user_block_count << 1) / 1000;
/* limit is 0.2% */
- if (test_opt(sbi, RESERVE_ROOT) && sbi->root_reserved_blocks > limit) {
- sbi->root_reserved_blocks = limit;
+ if (test_opt(sbi, RESERVE_ROOT) &&
+ F2FS_OPTION(sbi).root_reserved_blocks > limit) {
+ F2FS_OPTION(sbi).root_reserved_blocks = limit;
f2fs_msg(sbi->sb, KERN_INFO,
"Reduce reserved blocks for root = %u",
- sbi->root_reserved_blocks);
+ F2FS_OPTION(sbi).root_reserved_blocks);
}
if (!test_opt(sbi, RESERVE_ROOT) &&
- (!uid_eq(sbi->s_resuid,
+ (!uid_eq(F2FS_OPTION(sbi).s_resuid,
make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
- !gid_eq(sbi->s_resgid,
+ !gid_eq(F2FS_OPTION(sbi).s_resgid,
make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
f2fs_msg(sbi->sb, KERN_INFO,
"Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
- from_kuid_munged(&init_user_ns, sbi->s_resuid),
- from_kgid_munged(&init_user_ns, sbi->s_resgid));
+ from_kuid_munged(&init_user_ns,
+ F2FS_OPTION(sbi).s_resuid),
+ from_kgid_munged(&init_user_ns,
+ F2FS_OPTION(sbi).s_resgid));
}
static void init_once(void *foo)
@@ -236,7 +247,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
char *qname;
int ret = -EINVAL;
- if (sb_any_quota_loaded(sb) && !sbi->s_qf_names[qtype]) {
+ if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) {
f2fs_msg(sb, KERN_ERR,
"Cannot change journaled "
"quota options when quota turned on");
@@ -254,8 +265,8 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
"Not enough memory for storing quotafile name");
return -EINVAL;
}
- if (sbi->s_qf_names[qtype]) {
- if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
+ if (F2FS_OPTION(sbi).s_qf_names[qtype]) {
+ if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0)
ret = 0;
else
f2fs_msg(sb, KERN_ERR,
@@ -268,7 +279,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
"quotafile must be on filesystem root");
goto errout;
}
- sbi->s_qf_names[qtype] = qname;
+ F2FS_OPTION(sbi).s_qf_names[qtype] = qname;
set_opt(sbi, QUOTA);
return 0;
errout:
@@ -280,13 +291,13 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
- if (sb_any_quota_loaded(sb) && sbi->s_qf_names[qtype]) {
+ if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) {
f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options"
" when quota turned on");
return -EINVAL;
}
- kfree(sbi->s_qf_names[qtype]);
- sbi->s_qf_names[qtype] = NULL;
+ kfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
+ F2FS_OPTION(sbi).s_qf_names[qtype] = NULL;
return 0;
}
@@ -302,15 +313,19 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
"Cannot enable project quota enforcement.");
return -1;
}
- if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA] ||
- sbi->s_qf_names[PRJQUOTA]) {
- if (test_opt(sbi, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
+ if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
+ F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
+ F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) {
+ if (test_opt(sbi, USRQUOTA) &&
+ F2FS_OPTION(sbi).s_qf_names[USRQUOTA])
clear_opt(sbi, USRQUOTA);
- if (test_opt(sbi, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
+ if (test_opt(sbi, GRPQUOTA) &&
+ F2FS_OPTION(sbi).s_qf_names[GRPQUOTA])
clear_opt(sbi, GRPQUOTA);
- if (test_opt(sbi, PRJQUOTA) && sbi->s_qf_names[PRJQUOTA])
+ if (test_opt(sbi, PRJQUOTA) &&
+ F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
clear_opt(sbi, PRJQUOTA);
if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) ||
@@ -320,19 +335,19 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
return -1;
}
- if (!sbi->s_jquota_fmt) {
+ if (!F2FS_OPTION(sbi).s_jquota_fmt) {
f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format "
"not specified");
return -1;
}
}
- if (f2fs_sb_has_quota_ino(sbi->sb) && sbi->s_jquota_fmt) {
+ if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) {
f2fs_msg(sbi->sb, KERN_INFO,
"QUOTA feature is enabled, so ignore jquota_fmt");
- sbi->s_jquota_fmt = 0;
+ F2FS_OPTION(sbi).s_jquota_fmt = 0;
}
- if (f2fs_sb_has_quota_ino(sbi->sb) && sb_rdonly(sbi->sb)) {
+ if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) {
f2fs_msg(sbi->sb, KERN_INFO,
"Filesystem with quota feature cannot be mounted RDWR "
"without CONFIG_QUOTA");
@@ -403,14 +418,14 @@ static int parse_options(struct super_block *sb, char *options)
q = bdev_get_queue(sb->s_bdev);
if (blk_queue_discard(q)) {
set_opt(sbi, DISCARD);
- } else if (!f2fs_sb_mounted_blkzoned(sb)) {
+ } else if (!f2fs_sb_has_blkzoned(sb)) {
f2fs_msg(sb, KERN_WARNING,
"mounting with \"discard\" option, but "
"the device does not support discard");
}
break;
case Opt_nodiscard:
- if (f2fs_sb_mounted_blkzoned(sb)) {
+ if (f2fs_sb_has_blkzoned(sb)) {
f2fs_msg(sb, KERN_WARNING,
"discard is required for zoned block devices");
return -EINVAL;
@@ -440,7 +455,7 @@ static int parse_options(struct super_block *sb, char *options)
if (args->from && match_int(args, &arg))
return -EINVAL;
set_opt(sbi, INLINE_XATTR_SIZE);
- sbi->inline_xattr_size = arg;
+ F2FS_OPTION(sbi).inline_xattr_size = arg;
break;
#else
case Opt_user_xattr:
@@ -480,7 +495,7 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
return -EINVAL;
- sbi->active_logs = arg;
+ F2FS_OPTION(sbi).active_logs = arg;
break;
case Opt_disable_ext_identify:
set_opt(sbi, DISABLE_EXT_IDENTIFY);
@@ -524,9 +539,9 @@ static int parse_options(struct super_block *sb, char *options)
if (test_opt(sbi, RESERVE_ROOT)) {
f2fs_msg(sb, KERN_INFO,
"Preserve previous reserve_root=%u",
- sbi->root_reserved_blocks);
+ F2FS_OPTION(sbi).root_reserved_blocks);
} else {
- sbi->root_reserved_blocks = arg;
+ F2FS_OPTION(sbi).root_reserved_blocks = arg;
set_opt(sbi, RESERVE_ROOT);
}
break;
@@ -539,7 +554,7 @@ static int parse_options(struct super_block *sb, char *options)
"Invalid uid value %d", arg);
return -EINVAL;
}
- sbi->s_resuid = uid;
+ F2FS_OPTION(sbi).s_resuid = uid;
break;
case Opt_resgid:
if (args->from && match_int(args, &arg))
@@ -550,7 +565,7 @@ static int parse_options(struct super_block *sb, char *options)
"Invalid gid value %d", arg);
return -EINVAL;
}
- sbi->s_resgid = gid;
+ F2FS_OPTION(sbi).s_resgid = gid;
break;
case Opt_mode:
name = match_strdup(&args[0]);
@@ -559,7 +574,7 @@ static int parse_options(struct super_block *sb, char *options)
return -ENOMEM;
if (strlen(name) == 8 &&
!strncmp(name, "adaptive", 8)) {
- if (f2fs_sb_mounted_blkzoned(sb)) {
+ if (f2fs_sb_has_blkzoned(sb)) {
f2fs_msg(sb, KERN_WARNING,
"adaptive mode is not allowed with "
"zoned block device feature");
@@ -585,7 +600,7 @@ static int parse_options(struct super_block *sb, char *options)
1 << arg, BIO_MAX_PAGES);
return -EINVAL;
}
- sbi->write_io_size_bits = arg;
+ F2FS_OPTION(sbi).write_io_size_bits = arg;
break;
case Opt_fault_injection:
if (args->from && match_int(args, &arg))
@@ -646,13 +661,13 @@ static int parse_options(struct super_block *sb, char *options)
return ret;
break;
case Opt_jqfmt_vfsold:
- sbi->s_jquota_fmt = QFMT_VFS_OLD;
+ F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD;
break;
case Opt_jqfmt_vfsv0:
- sbi->s_jquota_fmt = QFMT_VFS_V0;
+ F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0;
break;
case Opt_jqfmt_vfsv1:
- sbi->s_jquota_fmt = QFMT_VFS_V1;
+ F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1;
break;
case Opt_noquota:
clear_opt(sbi, QUOTA);
@@ -679,6 +694,73 @@ static int parse_options(struct super_block *sb, char *options)
"quota operations not supported");
break;
#endif
+ case Opt_whint:
+ name = match_strdup(&args[0]);
+ if (!name)
+ return -ENOMEM;
+ if (strlen(name) == 10 &&
+ !strncmp(name, "user-based", 10)) {
+ F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER;
+ } else if (strlen(name) == 3 &&
+ !strncmp(name, "off", 3)) {
+ F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
+ } else if (strlen(name) == 8 &&
+ !strncmp(name, "fs-based", 8)) {
+ F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
+ } else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
+ case Opt_alloc:
+ name = match_strdup(&args[0]);
+ if (!name)
+ return -ENOMEM;
+
+ if (strlen(name) == 7 &&
+ !strncmp(name, "default", 7)) {
+ F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
+ } else if (strlen(name) == 5 &&
+ !strncmp(name, "reuse", 5)) {
+ F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
+ } else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
+ case Opt_fsync:
+ name = match_strdup(&args[0]);
+ if (!name)
+ return -ENOMEM;
+ if (strlen(name) == 5 &&
+ !strncmp(name, "posix", 5)) {
+ F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
+ } else if (strlen(name) == 6 &&
+ !strncmp(name, "strict", 6)) {
+ F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT;
+ } else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
+ case Opt_test_dummy_encryption:
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ if (!f2fs_sb_has_encrypt(sb)) {
+ f2fs_msg(sb, KERN_ERR, "Encrypt feature is off");
+ return -EINVAL;
+ }
+
+ F2FS_OPTION(sbi).test_dummy_encryption = true;
+ f2fs_msg(sb, KERN_INFO,
+ "Test dummy encryption mode enabled");
+#else
+ f2fs_msg(sb, KERN_INFO,
+ "Test dummy encryption mount option ignored");
+#endif
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -699,14 +781,22 @@ static int parse_options(struct super_block *sb, char *options)
}
if (test_opt(sbi, INLINE_XATTR_SIZE)) {
+ if (!f2fs_sb_has_extra_attr(sb) ||
+ !f2fs_sb_has_flexible_inline_xattr(sb)) {
+ f2fs_msg(sb, KERN_ERR,
+ "extra_attr or flexible_inline_xattr "
+ "feature is off");
+ return -EINVAL;
+ }
if (!test_opt(sbi, INLINE_XATTR)) {
f2fs_msg(sb, KERN_ERR,
"inline_xattr_size option should be "
"set with inline_xattr option");
return -EINVAL;
}
- if (!sbi->inline_xattr_size ||
- sbi->inline_xattr_size >= DEF_ADDRS_PER_INODE -
+ if (!F2FS_OPTION(sbi).inline_xattr_size ||
+ F2FS_OPTION(sbi).inline_xattr_size >=
+ DEF_ADDRS_PER_INODE -
F2FS_TOTAL_EXTRA_ATTR_SIZE -
DEF_INLINE_RESERVED_SIZE -
DEF_MIN_INLINE_SIZE) {
@@ -715,6 +805,12 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
}
}
+
+ /* Not pass down write hints if the number of active logs is lesser
+ * than NR_CURSEG_TYPE.
+ */
+ if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
+ F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
return 0;
}
@@ -731,7 +827,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
/* Initialize f2fs-specific inode info */
atomic_set(&fi->dirty_pages, 0);
fi->i_current_depth = 1;
- fi->i_advise = 0;
init_rwsem(&fi->i_sem);
INIT_LIST_HEAD(&fi->dirty_list);
INIT_LIST_HEAD(&fi->gdirty_list);
@@ -743,10 +838,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
init_rwsem(&fi->i_mmap_sem);
init_rwsem(&fi->i_xattr_sem);
-#ifdef CONFIG_QUOTA
- memset(&fi->i_dquot, 0, sizeof(fi->i_dquot));
- fi->i_reserved_quota = 0;
-#endif
/* Will be used by directory only */
fi->i_dir_level = F2FS_SB(sb)->dir_level;
@@ -956,7 +1047,7 @@ static void f2fs_put_super(struct super_block *sb)
mempool_destroy(sbi->write_io_dummy);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
- kfree(sbi->s_qf_names[i]);
+ kfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
destroy_percpu_info(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
@@ -1070,8 +1161,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_blocks = total_count - start_count;
buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
sbi->current_reserved_blocks;
- if (buf->f_bfree > sbi->root_reserved_blocks)
- buf->f_bavail = buf->f_bfree - sbi->root_reserved_blocks;
+ if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
+ buf->f_bavail = buf->f_bfree -
+ F2FS_OPTION(sbi).root_reserved_blocks;
else
buf->f_bavail = 0;
@@ -1106,10 +1198,10 @@ static inline void f2fs_show_quota_options(struct seq_file *seq,
#ifdef CONFIG_QUOTA
struct f2fs_sb_info *sbi = F2FS_SB(sb);
- if (sbi->s_jquota_fmt) {
+ if (F2FS_OPTION(sbi).s_jquota_fmt) {
char *fmtname = "";
- switch (sbi->s_jquota_fmt) {
+ switch (F2FS_OPTION(sbi).s_jquota_fmt) {
case QFMT_VFS_OLD:
fmtname = "vfsold";
break;
@@ -1123,14 +1215,17 @@ static inline void f2fs_show_quota_options(struct seq_file *seq,
seq_printf(seq, ",jqfmt=%s", fmtname);
}
- if (sbi->s_qf_names[USRQUOTA])
- seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]);
+ if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA])
+ seq_show_option(seq, "usrjquota",
+ F2FS_OPTION(sbi).s_qf_names[USRQUOTA]);
- if (sbi->s_qf_names[GRPQUOTA])
- seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]);
+ if (F2FS_OPTION(sbi).s_qf_names[GRPQUOTA])
+ seq_show_option(seq, "grpjquota",
+ F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]);
- if (sbi->s_qf_names[PRJQUOTA])
- seq_show_option(seq, "prjjquota", sbi->s_qf_names[PRJQUOTA]);
+ if (F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
+ seq_show_option(seq, "prjjquota",
+ F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]);
#endif
}
@@ -1165,7 +1260,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",noinline_xattr");
if (test_opt(sbi, INLINE_XATTR_SIZE))
seq_printf(seq, ",inline_xattr_size=%u",
- sbi->inline_xattr_size);
+ F2FS_OPTION(sbi).inline_xattr_size);
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
if (test_opt(sbi, POSIX_ACL))
@@ -1201,18 +1296,20 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, "adaptive");
else if (test_opt(sbi, LFS))
seq_puts(seq, "lfs");
- seq_printf(seq, ",active_logs=%u", sbi->active_logs);
+ seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs);
if (test_opt(sbi, RESERVE_ROOT))
seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
- sbi->root_reserved_blocks,
- from_kuid_munged(&init_user_ns, sbi->s_resuid),
- from_kgid_munged(&init_user_ns, sbi->s_resgid));
+ F2FS_OPTION(sbi).root_reserved_blocks,
+ from_kuid_munged(&init_user_ns,
+ F2FS_OPTION(sbi).s_resuid),
+ from_kgid_munged(&init_user_ns,
+ F2FS_OPTION(sbi).s_resgid));
if (F2FS_IO_SIZE_BITS(sbi))
seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (test_opt(sbi, FAULT_INJECTION))
seq_printf(seq, ",fault_injection=%u",
- sbi->fault_info.inject_rate);
+ F2FS_OPTION(sbi).fault_info.inject_rate);
#endif
#ifdef CONFIG_QUOTA
if (test_opt(sbi, QUOTA))
@@ -1225,15 +1322,37 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",prjquota");
#endif
f2fs_show_quota_options(seq, sbi->sb);
+ if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER)
+ seq_printf(seq, ",whint_mode=%s", "user-based");
+ else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS)
+ seq_printf(seq, ",whint_mode=%s", "fs-based");
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ if (F2FS_OPTION(sbi).test_dummy_encryption)
+ seq_puts(seq, ",test_dummy_encryption");
+#endif
+
+ if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT)
+ seq_printf(seq, ",alloc_mode=%s", "default");
+ else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
+ seq_printf(seq, ",alloc_mode=%s", "reuse");
+ if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
+ seq_printf(seq, ",fsync_mode=%s", "posix");
+ else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
+ seq_printf(seq, ",fsync_mode=%s", "strict");
return 0;
}
static void default_options(struct f2fs_sb_info *sbi)
{
/* init some FS parameters */
- sbi->active_logs = NR_CURSEG_TYPE;
- sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
+ F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
+ F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
+ F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
+ F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
+ F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
+ F2FS_OPTION(sbi).test_dummy_encryption = false;
+ sbi->readdir_ra = 1;
set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_XATTR);
@@ -1243,7 +1362,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, NOHEAP);
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
- if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
+ if (f2fs_sb_has_blkzoned(sbi->sb)) {
set_opt_mode(sbi, F2FS_MOUNT_LFS);
set_opt(sbi, DISCARD);
} else {
@@ -1270,16 +1389,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct f2fs_mount_info org_mount_opt;
unsigned long old_sb_flags;
- int err, active_logs;
+ int err;
bool need_restart_gc = false;
bool need_stop_gc = false;
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- struct f2fs_fault_info ffi = sbi->fault_info;
-#endif
#ifdef CONFIG_QUOTA
- int s_jquota_fmt;
- char *s_qf_names[MAXQUOTAS];
int i, j;
#endif
@@ -1289,21 +1403,21 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
*/
org_mount_opt = sbi->mount_opt;
old_sb_flags = sb->s_flags;
- active_logs = sbi->active_logs;
#ifdef CONFIG_QUOTA
- s_jquota_fmt = sbi->s_jquota_fmt;
+ org_mount_opt.s_jquota_fmt = F2FS_OPTION(sbi).s_jquota_fmt;
for (i = 0; i < MAXQUOTAS; i++) {
- if (sbi->s_qf_names[i]) {
- s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
- GFP_KERNEL);
- if (!s_qf_names[i]) {
+ if (F2FS_OPTION(sbi).s_qf_names[i]) {
+ org_mount_opt.s_qf_names[i] =
+ kstrdup(F2FS_OPTION(sbi).s_qf_names[i],
+ GFP_KERNEL);
+ if (!org_mount_opt.s_qf_names[i]) {
for (j = 0; j < i; j++)
- kfree(s_qf_names[j]);
+ kfree(org_mount_opt.s_qf_names[j]);
return -ENOMEM;
}
} else {
- s_qf_names[i] = NULL;
+ org_mount_opt.s_qf_names[i] = NULL;
}
}
#endif
@@ -1373,7 +1487,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
need_stop_gc = true;
}
- if (*flags & SB_RDONLY) {
+ if (*flags & SB_RDONLY ||
+ F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) {
writeback_inodes_sb(sb, WB_REASON_SYNC);
sync_inodes_sb(sb);
@@ -1399,7 +1514,7 @@ skip:
#ifdef CONFIG_QUOTA
/* Release old quota file names */
for (i = 0; i < MAXQUOTAS; i++)
- kfree(s_qf_names[i]);
+ kfree(org_mount_opt.s_qf_names[i]);
#endif
/* Update the POSIXACL Flag */
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
@@ -1417,18 +1532,14 @@ restore_gc:
}
restore_opts:
#ifdef CONFIG_QUOTA
- sbi->s_jquota_fmt = s_jquota_fmt;
+ F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt;
for (i = 0; i < MAXQUOTAS; i++) {
- kfree(sbi->s_qf_names[i]);
- sbi->s_qf_names[i] = s_qf_names[i];
+ kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i];
}
#endif
sbi->mount_opt = org_mount_opt;
- sbi->active_logs = active_logs;
sb->s_flags = old_sb_flags;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- sbi->fault_info = ffi;
-#endif
return err;
}
@@ -1456,7 +1567,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
while (toread > 0) {
tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
repeat:
- page = read_mapping_page(mapping, blkidx, NULL);
+ page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS);
if (IS_ERR(page)) {
if (PTR_ERR(page) == -ENOMEM) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
@@ -1550,8 +1661,8 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
{
- return dquot_quota_on_mount(sbi->sb, sbi->s_qf_names[type],
- sbi->s_jquota_fmt, type);
+ return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
+ F2FS_OPTION(sbi).s_jquota_fmt, type);
}
int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
@@ -1570,7 +1681,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
}
for (i = 0; i < MAXQUOTAS; i++) {
- if (sbi->s_qf_names[i]) {
+ if (F2FS_OPTION(sbi).s_qf_names[i]) {
err = f2fs_quota_on_mount(sbi, i);
if (!err) {
enabled = 1;
@@ -1797,11 +1908,28 @@ static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
void *fs_data)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ /*
+ * Encrypting the root directory is not allowed because fsck
+ * expects lost+found directory to exist and remain unencrypted
+ * if LOST_FOUND feature is enabled.
+ *
+ */
+ if (f2fs_sb_has_lost_found(sbi->sb) &&
+ inode->i_ino == F2FS_ROOT_INO(sbi))
+ return -EPERM;
+
return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
ctx, len, fs_data, XATTR_CREATE);
}
+static bool f2fs_dummy_context(struct inode *inode)
+{
+ return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode));
+}
+
static unsigned f2fs_max_namelen(struct inode *inode)
{
return S_ISLNK(inode->i_mode) ?
@@ -1812,6 +1940,7 @@ static const struct fscrypt_operations f2fs_cryptops = {
.key_prefix = "f2fs:",
.get_context = f2fs_get_context,
.set_context = f2fs_set_context,
+ .dummy_context = f2fs_dummy_context,
.empty_dir = f2fs_empty_dir,
.max_namelen = f2fs_max_namelen,
};
@@ -1894,7 +2023,6 @@ static int __f2fs_commit_super(struct buffer_head *bh,
lock_buffer(bh);
if (super)
memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
- set_buffer_uptodate(bh);
set_buffer_dirty(bh);
unlock_buffer(bh);
@@ -2181,6 +2309,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->dirty_device = 0;
spin_lock_init(&sbi->dev_lock);
+
+ init_rwsem(&sbi->sb_lock);
}
static int init_percpu_info(struct f2fs_sb_info *sbi)
@@ -2206,7 +2336,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
unsigned int n = 0;
int err = -EIO;
- if (!f2fs_sb_mounted_blkzoned(sbi->sb))
+ if (!f2fs_sb_has_blkzoned(sbi->sb))
return 0;
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
@@ -2334,7 +2464,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
}
/* write back-up superblock first */
- bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1);
+ bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1);
if (!bh)
return -EIO;
err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
@@ -2345,7 +2475,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
return err;
/* write current valid superblock */
- bh = sb_getblk(sbi->sb, sbi->valid_super_block);
+ bh = sb_bread(sbi->sb, sbi->valid_super_block);
if (!bh)
return -EIO;
err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
@@ -2413,7 +2543,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
#ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
- !f2fs_sb_mounted_blkzoned(sbi->sb)) {
+ !f2fs_sb_has_blkzoned(sbi->sb)) {
f2fs_msg(sbi->sb, KERN_ERR,
"Zoned block device feature not enabled\n");
return -EINVAL;
@@ -2447,6 +2577,18 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
return 0;
}
+static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_sm_info *sm_i = SM_I(sbi);
+
+ /* adjust parameters according to the volume size */
+ if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
+ F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
+ sm_i->dcc_info->discard_granularity = 1;
+ sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
+ }
+}
+
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
{
struct f2fs_sb_info *sbi;
@@ -2494,8 +2636,8 @@ try_onemore:
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;
- sbi->s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
- sbi->s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
+ F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
+ F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
/* precompute checksum seed for metadata */
if (f2fs_sb_has_inode_chksum(sb))
@@ -2508,7 +2650,7 @@ try_onemore:
* devices, but mandatory for host-managed zoned block devices.
*/
#ifndef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_mounted_blkzoned(sb)) {
+ if (f2fs_sb_has_blkzoned(sb)) {
f2fs_msg(sb, KERN_ERR,
"Zoned block device support is not enabled\n");
err = -EOPNOTSUPP;
@@ -2724,7 +2866,7 @@ try_onemore:
* Turn on quotas which were not enabled for read-only mounts if
* filesystem has quota feature, so that they are updated correctly.
*/
- if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) {
+ if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
err = f2fs_enable_quotas(sb);
if (err) {
f2fs_msg(sb, KERN_ERR,
@@ -2799,6 +2941,8 @@ skip_recovery:
f2fs_join_shrinker(sbi);
+ f2fs_tuning_parameters(sbi);
+
f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
cur_cp_version(F2FS_CKPT(sbi)));
f2fs_update_time(sbi, CP_TIME);
@@ -2807,7 +2951,7 @@ skip_recovery:
free_meta:
#ifdef CONFIG_QUOTA
- if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb))
+ if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb))
f2fs_quota_off_umount(sbi->sb);
#endif
f2fs_sync_inode_meta(sbi);
@@ -2851,7 +2995,7 @@ free_bio_info:
free_options:
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
- kfree(sbi->s_qf_names[i]);
+ kfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
kfree(options);
free_sb_buf:
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index d978c7b6ea04..f33a56d6e6dd 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -58,7 +58,7 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
#ifdef CONFIG_F2FS_FAULT_INJECTION
else if (struct_type == FAULT_INFO_RATE ||
struct_type == FAULT_INFO_TYPE)
- return (unsigned char *)&sbi->fault_info;
+ return (unsigned char *)&F2FS_OPTION(sbi).fault_info;
#endif
return NULL;
}
@@ -92,10 +92,10 @@ static ssize_t features_show(struct f2fs_attr *a,
if (!sb->s_bdev->bd_part)
return snprintf(buf, PAGE_SIZE, "0\n");
- if (f2fs_sb_has_crypto(sb))
+ if (f2fs_sb_has_encrypt(sb))
len += snprintf(buf, PAGE_SIZE - len, "%s",
"encryption");
- if (f2fs_sb_mounted_blkzoned(sb))
+ if (f2fs_sb_has_blkzoned(sb))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "blkzoned");
if (f2fs_sb_has_extra_attr(sb))
@@ -116,6 +116,9 @@ static ssize_t features_show(struct f2fs_attr *a,
if (f2fs_sb_has_inode_crtime(sb))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "inode_crtime");
+ if (f2fs_sb_has_lost_found(sb))
+ len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "lost_found");
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
return len;
}
@@ -136,6 +139,27 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
if (!ptr)
return -EINVAL;
+ if (!strcmp(a->attr.name, "extension_list")) {
+ __u8 (*extlist)[F2FS_EXTENSION_LEN] =
+ sbi->raw_super->extension_list;
+ int cold_count = le32_to_cpu(sbi->raw_super->extension_count);
+ int hot_count = sbi->raw_super->hot_ext_count;
+ int len = 0, i;
+
+ len += snprintf(buf + len, PAGE_SIZE - len,
+ "cold file extenstion:\n");
+ for (i = 0; i < cold_count; i++)
+ len += snprintf(buf + len, PAGE_SIZE - len, "%s\n",
+ extlist[i]);
+
+ len += snprintf(buf + len, PAGE_SIZE - len,
+ "hot file extenstion:\n");
+ for (i = cold_count; i < cold_count + hot_count; i++)
+ len += snprintf(buf + len, PAGE_SIZE - len, "%s\n",
+ extlist[i]);
+ return len;
+ }
+
ui = (unsigned int *)(ptr + a->offset);
return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
@@ -154,6 +178,41 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
if (!ptr)
return -EINVAL;
+ if (!strcmp(a->attr.name, "extension_list")) {
+ const char *name = strim((char *)buf);
+ bool set = true, hot;
+
+ if (!strncmp(name, "[h]", 3))
+ hot = true;
+ else if (!strncmp(name, "[c]", 3))
+ hot = false;
+ else
+ return -EINVAL;
+
+ name += 3;
+
+ if (*name == '!') {
+ name++;
+ set = false;
+ }
+
+ if (strlen(name) >= F2FS_EXTENSION_LEN)
+ return -EINVAL;
+
+ down_write(&sbi->sb_lock);
+
+ ret = update_extension_list(sbi, name, hot, set);
+ if (ret)
+ goto out;
+
+ ret = f2fs_commit_super(sbi, false);
+ if (ret)
+ update_extension_list(sbi, name, hot, !set);
+out:
+ up_write(&sbi->sb_lock);
+ return ret ? ret : count;
+ }
+
ui = (unsigned int *)(ptr + a->offset);
ret = kstrtoul(skip_spaces(buf), 0, &t);
@@ -166,7 +225,7 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
if (a->struct_type == RESERVED_BLOCKS) {
spin_lock(&sbi->stat_lock);
if (t > (unsigned long)(sbi->user_block_count -
- sbi->root_reserved_blocks)) {
+ F2FS_OPTION(sbi).root_reserved_blocks)) {
spin_unlock(&sbi->stat_lock);
return -EINVAL;
}
@@ -236,6 +295,7 @@ enum feat_id {
FEAT_FLEXIBLE_INLINE_XATTR,
FEAT_QUOTA_INO,
FEAT_INODE_CRTIME,
+ FEAT_LOST_FOUND,
};
static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -251,6 +311,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
case FEAT_FLEXIBLE_INLINE_XATTR:
case FEAT_QUOTA_INO:
case FEAT_INODE_CRTIME:
+ case FEAT_LOST_FOUND:
return snprintf(buf, PAGE_SIZE, "supported\n");
}
return 0;
@@ -307,6 +368,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list);
#ifdef CONFIG_F2FS_FAULT_INJECTION
F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
@@ -329,6 +391,7 @@ F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM);
F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR);
F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME);
+F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -357,6 +420,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(iostat_enable),
ATTR_LIST(readdir_ra),
ATTR_LIST(gc_pin_file_thresh),
+ ATTR_LIST(extension_list),
#ifdef CONFIG_F2FS_FAULT_INJECTION
ATTR_LIST(inject_rate),
ATTR_LIST(inject_type),
@@ -383,6 +447,7 @@ static struct attribute *f2fs_feat_attrs[] = {
ATTR_LIST(flexible_inline_xattr),
ATTR_LIST(quota_ino),
ATTR_LIST(inode_crtime),
+ ATTR_LIST(lost_found),
NULL,
};
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d4d04fee568a..1280f915079b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1343,7 +1343,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
dirty = inode->i_state & I_DIRTY;
if (inode->i_state & I_DIRTY_TIME) {
- if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
+ if ((dirty & I_DIRTY_INODE) ||
wbc->sync_mode == WB_SYNC_ALL ||
unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
unlikely(time_after(jiffies,
@@ -2112,7 +2112,6 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
*/
void __mark_inode_dirty(struct inode *inode, int flags)
{
-#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
struct super_block *sb = inode->i_sb;
int dirtytime;
@@ -2122,7 +2121,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
* Don't do this for I_DIRTY_PAGES - that doesn't actually
* dirty the inode itself
*/
- if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) {
+ if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) {
trace_writeback_dirty_inode_start(inode, flags);
if (sb->s_op->dirty_inode)
@@ -2197,7 +2196,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
if (dirtytime)
inode->dirtied_time_when = jiffies;
- if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
+ if (inode->i_state & I_DIRTY)
dirty_list = &wb->b_dirty;
else
dirty_list = &wb->b_dirty_time;
@@ -2221,8 +2220,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
out_unlock_inode:
spin_unlock(&inode->i_lock);
-
-#undef I_DIRTY_INODE
}
EXPORT_SYMBOL(__mark_inode_dirty);
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
index 56cce7fdd39e..c184c5a356ff 100644
--- a/fs/fscache/cache.c
+++ b/fs/fscache/cache.c
@@ -125,7 +125,7 @@ struct fscache_cache *fscache_select_cache_for_object(
}
/* the parent is unbacked */
- if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
+ if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX) {
/* cookie not an index and is unbacked */
spin_unlock(&cookie->lock);
_leave(" = NULL [cookie ub,ni]");
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index d705125665f0..7dc55b93a830 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -21,12 +21,54 @@ struct kmem_cache *fscache_cookie_jar;
static atomic_t fscache_object_debug_id = ATOMIC_INIT(0);
-static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie);
+#define fscache_cookie_hash_shift 15
+static struct hlist_bl_head fscache_cookie_hash[1 << fscache_cookie_hash_shift];
+
+static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie,
+ loff_t object_size);
static int fscache_alloc_object(struct fscache_cache *cache,
struct fscache_cookie *cookie);
static int fscache_attach_object(struct fscache_cookie *cookie,
struct fscache_object *object);
+static void fscache_print_cookie(struct fscache_cookie *cookie, char prefix)
+{
+ struct hlist_node *object;
+ const u8 *k;
+ unsigned loop;
+
+ pr_err("%c-cookie c=%p [p=%p fl=%lx nc=%u na=%u]\n",
+ prefix, cookie, cookie->parent, cookie->flags,
+ atomic_read(&cookie->n_children),
+ atomic_read(&cookie->n_active));
+ pr_err("%c-cookie d=%p n=%p\n",
+ prefix, cookie->def, cookie->netfs_data);
+
+ object = READ_ONCE(cookie->backing_objects.first);
+ if (object)
+ pr_err("%c-cookie o=%p\n",
+ prefix, hlist_entry(object, struct fscache_object, cookie_link));
+
+ pr_err("%c-key=[%u] '", prefix, cookie->key_len);
+ k = (cookie->key_len <= sizeof(cookie->inline_key)) ?
+ cookie->inline_key : cookie->key;
+ for (loop = 0; loop < cookie->key_len; loop++)
+ pr_cont("%02x", k[loop]);
+ pr_cont("'\n");
+}
+
+void fscache_free_cookie(struct fscache_cookie *cookie)
+{
+ if (cookie) {
+ BUG_ON(!hlist_empty(&cookie->backing_objects));
+ if (cookie->aux_len > sizeof(cookie->inline_aux))
+ kfree(cookie->aux);
+ if (cookie->key_len > sizeof(cookie->inline_key))
+ kfree(cookie->key);
+ kmem_cache_free(fscache_cookie_jar, cookie);
+ }
+}
+
/*
* initialise an cookie jar slab element prior to any use
*/
@@ -41,6 +83,170 @@ void fscache_cookie_init_once(void *_cookie)
}
/*
+ * Set the index key in a cookie. The cookie struct has space for a 12-byte
+ * key plus length and hash, but if that's not big enough, it's instead a
+ * pointer to a buffer containing 3 bytes of hash, 1 byte of length and then
+ * the key data.
+ */
+static int fscache_set_key(struct fscache_cookie *cookie,
+ const void *index_key, size_t index_key_len)
+{
+ unsigned long long h;
+ u32 *buf;
+ int i;
+
+ cookie->key_len = index_key_len;
+
+ if (index_key_len > sizeof(cookie->inline_key)) {
+ buf = kzalloc(index_key_len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ cookie->key = buf;
+ } else {
+ buf = (u32 *)cookie->inline_key;
+ buf[0] = 0;
+ buf[1] = 0;
+ buf[2] = 0;
+ }
+
+ memcpy(buf, index_key, index_key_len);
+
+ /* Calculate a hash and combine this with the length in the first word
+ * or first half word
+ */
+ h = (unsigned long)cookie->parent;
+ h += index_key_len + cookie->type;
+ for (i = 0; i < (index_key_len + sizeof(u32) - 1) / sizeof(u32); i++)
+ h += buf[i];
+
+ cookie->key_hash = h ^ (h >> 32);
+ return 0;
+}
+
+static long fscache_compare_cookie(const struct fscache_cookie *a,
+ const struct fscache_cookie *b)
+{
+ const void *ka, *kb;
+
+ if (a->key_hash != b->key_hash)
+ return (long)a->key_hash - (long)b->key_hash;
+ if (a->parent != b->parent)
+ return (long)a->parent - (long)b->parent;
+ if (a->key_len != b->key_len)
+ return (long)a->key_len - (long)b->key_len;
+ if (a->type != b->type)
+ return (long)a->type - (long)b->type;
+
+ if (a->key_len <= sizeof(a->inline_key)) {
+ ka = &a->inline_key;
+ kb = &b->inline_key;
+ } else {
+ ka = a->key;
+ kb = b->key;
+ }
+ return memcmp(ka, kb, a->key_len);
+}
+
+/*
+ * Allocate a cookie.
+ */
+struct fscache_cookie *fscache_alloc_cookie(
+ struct fscache_cookie *parent,
+ const struct fscache_cookie_def *def,
+ const void *index_key, size_t index_key_len,
+ const void *aux_data, size_t aux_data_len,
+ void *netfs_data,
+ loff_t object_size)
+{
+ struct fscache_cookie *cookie;
+
+ /* allocate and initialise a cookie */
+ cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL);
+ if (!cookie)
+ return NULL;
+
+ cookie->key_len = index_key_len;
+ cookie->aux_len = aux_data_len;
+
+ if (fscache_set_key(cookie, index_key, index_key_len) < 0)
+ goto nomem;
+
+ if (cookie->aux_len <= sizeof(cookie->inline_aux)) {
+ memcpy(cookie->inline_aux, aux_data, cookie->aux_len);
+ } else {
+ cookie->aux = kmemdup(aux_data, cookie->aux_len, GFP_KERNEL);
+ if (!cookie->aux)
+ goto nomem;
+ }
+
+ atomic_set(&cookie->usage, 1);
+ atomic_set(&cookie->n_children, 0);
+
+ /* We keep the active count elevated until relinquishment to prevent an
+ * attempt to wake up every time the object operations queue quiesces.
+ */
+ atomic_set(&cookie->n_active, 1);
+
+ cookie->def = def;
+ cookie->parent = parent;
+ cookie->netfs_data = netfs_data;
+ cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET);
+ cookie->type = def->type;
+
+ /* radix tree insertion won't use the preallocation pool unless it's
+ * told it may not wait */
+ INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+ return cookie;
+
+nomem:
+ fscache_free_cookie(cookie);
+ return NULL;
+}
+
+/*
+ * Attempt to insert the new cookie into the hash. If there's a collision, we
+ * return the old cookie if it's not in use and an error otherwise.
+ */
+struct fscache_cookie *fscache_hash_cookie(struct fscache_cookie *candidate)
+{
+ struct fscache_cookie *cursor;
+ struct hlist_bl_head *h;
+ struct hlist_bl_node *p;
+ unsigned int bucket;
+
+ bucket = candidate->key_hash & (ARRAY_SIZE(fscache_cookie_hash) - 1);
+ h = &fscache_cookie_hash[bucket];
+
+ hlist_bl_lock(h);
+ hlist_bl_for_each_entry(cursor, p, h, hash_link) {
+ if (fscache_compare_cookie(candidate, cursor) == 0)
+ goto collision;
+ }
+
+ __set_bit(FSCACHE_COOKIE_ACQUIRED, &candidate->flags);
+ fscache_cookie_get(candidate->parent, fscache_cookie_get_acquire_parent);
+ atomic_inc(&candidate->parent->n_children);
+ hlist_bl_add_head(&candidate->hash_link, h);
+ hlist_bl_unlock(h);
+ return candidate;
+
+collision:
+ if (test_and_set_bit(FSCACHE_COOKIE_ACQUIRED, &cursor->flags)) {
+ trace_fscache_cookie(cursor, fscache_cookie_collision,
+ atomic_read(&cursor->usage));
+ pr_err("Duplicate cookie detected\n");
+ fscache_print_cookie(cursor, 'O');
+ fscache_print_cookie(candidate, 'N');
+ hlist_bl_unlock(h);
+ return NULL;
+ }
+
+ fscache_cookie_get(cursor, fscache_cookie_get_reacquire);
+ hlist_bl_unlock(h);
+ return cursor;
+}
+
+/*
* request a cookie to represent an object (index, datafile, xattr, etc)
* - parent specifies the parent object
* - the top level index cookie for each netfs is stored in the fscache_netfs
@@ -58,10 +264,13 @@ void fscache_cookie_init_once(void *_cookie)
struct fscache_cookie *__fscache_acquire_cookie(
struct fscache_cookie *parent,
const struct fscache_cookie_def *def,
+ const void *index_key, size_t index_key_len,
+ const void *aux_data, size_t aux_data_len,
void *netfs_data,
+ loff_t object_size,
bool enable)
{
- struct fscache_cookie *cookie;
+ struct fscache_cookie *candidate, *cookie;
BUG_ON(!def);
@@ -69,6 +278,13 @@ struct fscache_cookie *__fscache_acquire_cookie(
parent ? (char *) parent->def->name : "<no-parent>",
def->name, netfs_data, enable);
+ if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255)
+ return NULL;
+ if (!aux_data || !aux_data_len) {
+ aux_data = NULL;
+ aux_data_len = 0;
+ }
+
fscache_stat(&fscache_n_acquires);
/* if there's no parent cookie, then we don't create one here either */
@@ -79,41 +295,31 @@ struct fscache_cookie *__fscache_acquire_cookie(
}
/* validate the definition */
- BUG_ON(!def->get_key);
BUG_ON(!def->name[0]);
BUG_ON(def->type == FSCACHE_COOKIE_TYPE_INDEX &&
- parent->def->type != FSCACHE_COOKIE_TYPE_INDEX);
+ parent->type != FSCACHE_COOKIE_TYPE_INDEX);
- /* allocate and initialise a cookie */
- cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL);
- if (!cookie) {
+ candidate = fscache_alloc_cookie(parent, def,
+ index_key, index_key_len,
+ aux_data, aux_data_len,
+ netfs_data, object_size);
+ if (!candidate) {
fscache_stat(&fscache_n_acquires_oom);
_leave(" [ENOMEM]");
return NULL;
}
- atomic_set(&cookie->usage, 1);
- atomic_set(&cookie->n_children, 0);
-
- /* We keep the active count elevated until relinquishment to prevent an
- * attempt to wake up every time the object operations queue quiesces.
- */
- atomic_set(&cookie->n_active, 1);
-
- atomic_inc(&parent->usage);
- atomic_inc(&parent->n_children);
+ cookie = fscache_hash_cookie(candidate);
+ if (!cookie) {
+ trace_fscache_cookie(candidate, fscache_cookie_discard, 1);
+ goto out;
+ }
- cookie->def = def;
- cookie->parent = parent;
- cookie->netfs_data = netfs_data;
- cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET);
+ if (cookie == candidate)
+ candidate = NULL;
- /* radix tree insertion won't use the preallocation pool unless it's
- * told it may not wait */
- INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
-
- switch (cookie->def->type) {
+ switch (cookie->type) {
case FSCACHE_COOKIE_TYPE_INDEX:
fscache_stat(&fscache_n_cookie_index);
break;
@@ -125,16 +331,19 @@ struct fscache_cookie *__fscache_acquire_cookie(
break;
}
+ trace_fscache_acquire(cookie);
+
if (enable) {
/* if the object is an index then we need do nothing more here
* - we create indices on disk when we need them as an index
* may exist in multiple caches */
- if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
- if (fscache_acquire_non_index_cookie(cookie) == 0) {
+ if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX) {
+ if (fscache_acquire_non_index_cookie(cookie, object_size) == 0) {
set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
} else {
atomic_dec(&parent->n_children);
- __fscache_cookie_put(cookie);
+ fscache_cookie_put(cookie,
+ fscache_cookie_put_acquire_nobufs);
fscache_stat(&fscache_n_acquires_nobufs);
_leave(" = NULL");
return NULL;
@@ -145,7 +354,9 @@ struct fscache_cookie *__fscache_acquire_cookie(
}
fscache_stat(&fscache_n_acquires_ok);
- _leave(" = %p", cookie);
+
+out:
+ fscache_free_cookie(candidate);
return cookie;
}
EXPORT_SYMBOL(__fscache_acquire_cookie);
@@ -154,24 +365,30 @@ EXPORT_SYMBOL(__fscache_acquire_cookie);
* Enable a cookie to permit it to accept new operations.
*/
void __fscache_enable_cookie(struct fscache_cookie *cookie,
+ const void *aux_data,
+ loff_t object_size,
bool (*can_enable)(void *data),
void *data)
{
_enter("%p", cookie);
+ trace_fscache_enable(cookie);
+
wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
TASK_UNINTERRUPTIBLE);
+ fscache_update_aux(cookie, aux_data);
+
if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
goto out_unlock;
if (can_enable && !can_enable(data)) {
/* The netfs decided it didn't want to enable after all */
- } else if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
+ } else if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX) {
/* Wait for outstanding disablement to complete */
__fscache_wait_on_invalidate(cookie);
- if (fscache_acquire_non_index_cookie(cookie) == 0)
+ if (fscache_acquire_non_index_cookie(cookie, object_size) == 0)
set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
} else {
set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
@@ -188,11 +405,11 @@ EXPORT_SYMBOL(__fscache_enable_cookie);
* - this must make sure the index chain is instantiated and instantiate the
* object representation too
*/
-static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
+static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie,
+ loff_t object_size)
{
struct fscache_object *object;
struct fscache_cache *cache;
- uint64_t i_size;
int ret;
_enter("");
@@ -231,9 +448,6 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
return ret;
}
- /* pass on how big the object we're caching is supposed to be */
- cookie->def->get_attr(cookie->netfs_data, &i_size);
-
spin_lock(&cookie->lock);
if (hlist_empty(&cookie->backing_objects)) {
spin_unlock(&cookie->lock);
@@ -243,7 +457,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
object = hlist_entry(cookie->backing_objects.first,
struct fscache_object, cookie_link);
- fscache_set_store_limit(object, i_size);
+ fscache_set_store_limit(object, object_size);
/* initiate the process of looking up all the objects in the chain
* (done by fscache_initialise_object()) */
@@ -318,7 +532,7 @@ static int fscache_alloc_object(struct fscache_cache *cache,
* attached to the cookie */
if (fscache_attach_object(cookie, object) < 0) {
fscache_stat(&fscache_n_cop_put_object);
- cache->ops->put_object(object);
+ cache->ops->put_object(object, fscache_obj_put_attach_fail);
fscache_stat_d(&fscache_n_cop_put_object);
}
@@ -338,7 +552,7 @@ object_already_extant:
error_put:
fscache_stat(&fscache_n_cop_put_object);
- cache->ops->put_object(object);
+ cache->ops->put_object(object, fscache_obj_put_alloc_fail);
fscache_stat_d(&fscache_n_cop_put_object);
error:
_leave(" = %d", ret);
@@ -398,7 +612,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
/* attach to the cookie */
object->cookie = cookie;
- atomic_inc(&cookie->usage);
+ fscache_cookie_get(cookie, fscache_cookie_get_attach_object);
hlist_add_head(&object->cookie_link, &cookie->backing_objects);
fscache_objlist_add(object);
@@ -426,10 +640,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie)
* there, and if it's doing that, it may as well just retire the
* cookie.
*/
- ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE);
-
- /* We will be updating the cookie too. */
- BUG_ON(!cookie->def->get_aux);
+ ASSERTCMP(cookie->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE);
/* If there's an object, we tell the object state machine to handle the
* invalidation on our behalf, otherwise there's nothing to do.
@@ -473,7 +684,7 @@ EXPORT_SYMBOL(__fscache_wait_on_invalidate);
/*
* update the index entries backing a cookie
*/
-void __fscache_update_cookie(struct fscache_cookie *cookie)
+void __fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data)
{
struct fscache_object *object;
@@ -487,10 +698,10 @@ void __fscache_update_cookie(struct fscache_cookie *cookie)
_enter("{%s}", cookie->def->name);
- BUG_ON(!cookie->def->get_aux);
-
spin_lock(&cookie->lock);
+ fscache_update_aux(cookie, aux_data);
+
if (fscache_cookie_enabled(cookie)) {
/* update the index entry on disk in each cache backing this
* cookie.
@@ -509,13 +720,17 @@ EXPORT_SYMBOL(__fscache_update_cookie);
/*
* Disable a cookie to stop it from accepting new requests from the netfs.
*/
-void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
+void __fscache_disable_cookie(struct fscache_cookie *cookie,
+ const void *aux_data,
+ bool invalidate)
{
struct fscache_object *object;
bool awaken = false;
_enter("%p,%u", cookie, invalidate);
+ trace_fscache_disable(cookie);
+
ASSERTCMP(atomic_read(&cookie->n_active), >, 0);
if (atomic_read(&cookie->n_children) != 0) {
@@ -526,6 +741,9 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
TASK_UNINTERRUPTIBLE);
+
+ fscache_update_aux(cookie, aux_data);
+
if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
goto out_unlock_enable;
@@ -563,7 +781,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
}
/* Make sure any pending writes are cancelled. */
- if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX)
+ if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX)
fscache_invalidate_writes(cookie);
/* Reset the cookie state if it wasn't relinquished */
@@ -585,7 +803,9 @@ EXPORT_SYMBOL(__fscache_disable_cookie);
* - all dependents of this cookie must have already been unregistered
* (indices/files/pages)
*/
-void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
+void __fscache_relinquish_cookie(struct fscache_cookie *cookie,
+ const void *aux_data,
+ bool retire)
{
fscache_stat(&fscache_n_relinquishes);
if (retire)
@@ -601,10 +821,13 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
cookie, cookie->def->name, cookie->netfs_data,
atomic_read(&cookie->n_active), retire);
+ trace_fscache_relinquish(cookie, retire);
+
/* No further netfs-accessing operations on this cookie permitted */
- set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags);
+ if (test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags))
+ BUG();
- __fscache_disable_cookie(cookie, retire);
+ __fscache_disable_cookie(cookie, aux_data, retire);
/* Clear pointers back to the netfs */
cookie->netfs_data = NULL;
@@ -619,35 +842,54 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
/* Dispose of the netfs's link to the cookie */
ASSERTCMP(atomic_read(&cookie->usage), >, 0);
- fscache_cookie_put(cookie);
+ fscache_cookie_put(cookie, fscache_cookie_put_relinquish);
_leave("");
}
EXPORT_SYMBOL(__fscache_relinquish_cookie);
/*
- * destroy a cookie
+ * Remove a cookie from the hash table.
*/
-void __fscache_cookie_put(struct fscache_cookie *cookie)
+static void fscache_unhash_cookie(struct fscache_cookie *cookie)
+{
+ struct hlist_bl_head *h;
+ unsigned int bucket;
+
+ bucket = cookie->key_hash & (ARRAY_SIZE(fscache_cookie_hash) - 1);
+ h = &fscache_cookie_hash[bucket];
+
+ hlist_bl_lock(h);
+ hlist_bl_del(&cookie->hash_link);
+ hlist_bl_unlock(h);
+}
+
+/*
+ * Drop a reference to a cookie.
+ */
+void fscache_cookie_put(struct fscache_cookie *cookie,
+ enum fscache_cookie_trace where)
{
struct fscache_cookie *parent;
+ int usage;
_enter("%p", cookie);
- for (;;) {
- _debug("FREE COOKIE %p", cookie);
- parent = cookie->parent;
- BUG_ON(!hlist_empty(&cookie->backing_objects));
- kmem_cache_free(fscache_cookie_jar, cookie);
+ do {
+ usage = atomic_dec_return(&cookie->usage);
+ trace_fscache_cookie(cookie, where, usage);
- if (!parent)
- break;
+ if (usage > 0)
+ return;
+ BUG_ON(usage < 0);
+
+ parent = cookie->parent;
+ fscache_unhash_cookie(cookie);
+ fscache_free_cookie(cookie);
cookie = parent;
- BUG_ON(atomic_read(&cookie->usage) <= 0);
- if (!atomic_dec_and_test(&cookie->usage))
- break;
- }
+ where = fscache_cookie_put_parent;
+ } while (cookie);
_leave("");
}
@@ -657,7 +899,8 @@ void __fscache_cookie_put(struct fscache_cookie *cookie)
*
* NOTE: it only serves no-index type
*/
-int __fscache_check_consistency(struct fscache_cookie *cookie)
+int __fscache_check_consistency(struct fscache_cookie *cookie,
+ const void *aux_data)
{
struct fscache_operation *op;
struct fscache_object *object;
@@ -666,7 +909,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
_enter("%p,", cookie);
- ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE);
+ ASSERTCMP(cookie->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE);
if (fscache_wait_for_deferred_lookup(cookie) < 0)
return -ERESTARTSYS;
@@ -678,13 +921,16 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
if (!op)
return -ENOMEM;
- fscache_operation_init(op, NULL, NULL, NULL);
+ fscache_operation_init(cookie, op, NULL, NULL, NULL);
op->flags = FSCACHE_OP_MYTHREAD |
(1 << FSCACHE_OP_WAITING) |
(1 << FSCACHE_OP_UNUSE_COOKIE);
+ trace_fscache_page_op(cookie, NULL, op, fscache_page_op_check_consistency);
spin_lock(&cookie->lock);
+ fscache_update_aux(cookie, aux_data);
+
if (!fscache_cookie_enabled(cookie) ||
hlist_empty(&cookie->backing_objects))
goto inconsistent;
diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c
index 5a117df2a9ef..aa46e48d8c75 100644
--- a/fs/fscache/fsdef.c
+++ b/fs/fscache/fsdef.c
@@ -13,16 +13,11 @@
#include <linux/module.h>
#include "internal.h"
-static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax);
-
-static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax);
-
static
enum fscache_checkaux fscache_fsdef_netfs_check_aux(void *cookie_netfs_data,
const void *data,
- uint16_t datalen);
+ uint16_t datalen,
+ loff_t object_size);
/*
* The root index is owned by FS-Cache itself.
@@ -60,6 +55,7 @@ struct fscache_cookie fscache_fsdef_index = {
.backing_objects = HLIST_HEAD_INIT,
.def = &fscache_fsdef_index_def,
.flags = 1 << FSCACHE_COOKIE_ENABLED,
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
};
EXPORT_SYMBOL(fscache_fsdef_index);
@@ -71,59 +67,18 @@ EXPORT_SYMBOL(fscache_fsdef_index);
struct fscache_cookie_def fscache_fsdef_netfs_def = {
.name = "FSDEF.netfs",
.type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = fscache_fsdef_netfs_get_key,
- .get_aux = fscache_fsdef_netfs_get_aux,
.check_aux = fscache_fsdef_netfs_check_aux,
};
/*
- * get the key data for an FSDEF index record - this is the name of the netfs
- * for which this entry is created
- */
-static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct fscache_netfs *netfs = cookie_netfs_data;
- unsigned klen;
-
- _enter("{%s.%u},", netfs->name, netfs->version);
-
- klen = strlen(netfs->name);
- if (klen > bufmax)
- return 0;
-
- memcpy(buffer, netfs->name, klen);
- return klen;
-}
-
-/*
- * get the auxiliary data for an FSDEF index record - this is the index
- * structure version number of the netfs for which this version is created
- */
-static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct fscache_netfs *netfs = cookie_netfs_data;
- unsigned dlen;
-
- _enter("{%s.%u},", netfs->name, netfs->version);
-
- dlen = sizeof(uint32_t);
- if (dlen > bufmax)
- return 0;
-
- memcpy(buffer, &netfs->version, dlen);
- return dlen;
-}
-
-/*
* check that the index structure version number stored in the auxiliary data
* matches the one the netfs gave us
*/
static enum fscache_checkaux fscache_fsdef_netfs_check_aux(
void *cookie_netfs_data,
const void *data,
- uint16_t datalen)
+ uint16_t datalen,
+ loff_t object_size)
{
struct fscache_netfs *netfs = cookie_netfs_data;
uint32_t version;
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 0ff4b49a0037..500650f938fe 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -29,6 +29,7 @@
#define pr_fmt(fmt) "FS-Cache: " fmt
#include <linux/fscache-cache.h>
+#include <trace/events/fscache.h>
#include <linux/sched.h>
#define FSCACHE_MIN_THREADS 4
@@ -48,8 +49,16 @@ extern struct fscache_cache *fscache_select_cache_for_object(
*/
extern struct kmem_cache *fscache_cookie_jar;
+extern void fscache_free_cookie(struct fscache_cookie *);
extern void fscache_cookie_init_once(void *);
-extern void __fscache_cookie_put(struct fscache_cookie *);
+extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *,
+ const struct fscache_cookie_def *,
+ const void *, size_t,
+ const void *, size_t,
+ void *, loff_t);
+extern struct fscache_cookie *fscache_hash_cookie(struct fscache_cookie *);
+extern void fscache_cookie_put(struct fscache_cookie *,
+ enum fscache_cookie_trace);
/*
* fsdef.c
@@ -311,14 +320,12 @@ static inline void fscache_raise_event(struct fscache_object *object,
fscache_enqueue_object(object);
}
-/*
- * drop a reference to a cookie
- */
-static inline void fscache_cookie_put(struct fscache_cookie *cookie)
+static inline void fscache_cookie_get(struct fscache_cookie *cookie,
+ enum fscache_cookie_trace where)
{
- BUG_ON(atomic_read(&cookie->usage) <= 0);
- if (atomic_dec_and_test(&cookie->usage))
- __fscache_cookie_put(cookie);
+ int usage = atomic_inc_return(&cookie->usage);
+
+ trace_fscache_cookie(cookie, where, usage);
}
/*
@@ -342,6 +349,27 @@ void fscache_put_context(struct fscache_cookie *cookie, void *context)
cookie->def->put_context(cookie->netfs_data, context);
}
+/*
+ * Update the auxiliary data on a cookie.
+ */
+static inline
+void fscache_update_aux(struct fscache_cookie *cookie, const void *aux_data)
+{
+ void *p;
+
+ if (!aux_data)
+ return;
+ if (cookie->aux_len <= sizeof(cookie->inline_aux))
+ p = cookie->inline_aux;
+ else
+ p = cookie->aux;
+
+ if (memcmp(p, aux_data, cookie->aux_len) != 0) {
+ memcpy(p, aux_data, cookie->aux_len);
+ set_bit(FSCACHE_COOKIE_AUX_UPDATED, &cookie->flags);
+ }
+}
+
/*****************************************************************************/
/*
* debug tracing
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 249968dcbf5c..7dce110bf17d 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -16,6 +16,7 @@
#include <linux/completion.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
+#define CREATE_TRACE_POINTS
#include "internal.h"
MODULE_DESCRIPTION("FS Cache Manager");
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
index a8aa00be4444..c2f605483cc5 100644
--- a/fs/fscache/netfs.c
+++ b/fs/fscache/netfs.c
@@ -14,69 +14,51 @@
#include <linux/slab.h>
#include "internal.h"
-static LIST_HEAD(fscache_netfs_list);
-
/*
* register a network filesystem for caching
*/
int __fscache_register_netfs(struct fscache_netfs *netfs)
{
- struct fscache_netfs *ptr;
- struct fscache_cookie *cookie;
- int ret;
+ struct fscache_cookie *candidate, *cookie;
_enter("{%s}", netfs->name);
- INIT_LIST_HEAD(&netfs->link);
-
/* allocate a cookie for the primary index */
- cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL);
-
- if (!cookie) {
+ candidate = fscache_alloc_cookie(&fscache_fsdef_index,
+ &fscache_fsdef_netfs_def,
+ netfs->name, strlen(netfs->name),
+ &netfs->version, sizeof(netfs->version),
+ netfs, 0);
+ if (!candidate) {
_leave(" = -ENOMEM");
return -ENOMEM;
}
- /* initialise the primary index cookie */
- atomic_set(&cookie->usage, 1);
- atomic_set(&cookie->n_children, 0);
- atomic_set(&cookie->n_active, 1);
-
- cookie->def = &fscache_fsdef_netfs_def;
- cookie->parent = &fscache_fsdef_index;
- cookie->netfs_data = netfs;
- cookie->flags = 1 << FSCACHE_COOKIE_ENABLED;
-
- spin_lock_init(&cookie->lock);
- spin_lock_init(&cookie->stores_lock);
- INIT_HLIST_HEAD(&cookie->backing_objects);
+ candidate->flags = 1 << FSCACHE_COOKIE_ENABLED;
/* check the netfs type is not already present */
- down_write(&fscache_addremove_sem);
-
- ret = -EEXIST;
- list_for_each_entry(ptr, &fscache_netfs_list, link) {
- if (strcmp(ptr->name, netfs->name) == 0)
- goto already_registered;
+ cookie = fscache_hash_cookie(candidate);
+ if (!cookie)
+ goto already_registered;
+ if (cookie != candidate) {
+ trace_fscache_cookie(candidate, fscache_cookie_discard, 1);
+ fscache_free_cookie(candidate);
}
- atomic_inc(&cookie->parent->usage);
+ fscache_cookie_get(cookie->parent, fscache_cookie_get_register_netfs);
atomic_inc(&cookie->parent->n_children);
netfs->primary_index = cookie;
- list_add(&netfs->link, &fscache_netfs_list);
- ret = 0;
pr_notice("Netfs '%s' registered for caching\n", netfs->name);
+ trace_fscache_netfs(netfs);
+ _leave(" = 0");
+ return 0;
already_registered:
- up_write(&fscache_addremove_sem);
-
- if (ret < 0)
- kmem_cache_free(fscache_cookie_jar, cookie);
-
- _leave(" = %d", ret);
- return ret;
+ fscache_cookie_put(candidate, fscache_cookie_put_dup_netfs);
+ _leave(" = -EEXIST");
+ return -EEXIST;
}
EXPORT_SYMBOL(__fscache_register_netfs);
@@ -88,15 +70,8 @@ void __fscache_unregister_netfs(struct fscache_netfs *netfs)
{
_enter("{%s.%u}", netfs->name, netfs->version);
- down_write(&fscache_addremove_sem);
-
- list_del(&netfs->link);
- fscache_relinquish_cookie(netfs->primary_index, 0);
-
- up_write(&fscache_addremove_sem);
-
- pr_notice("Netfs '%s' unregistered from caching\n",
- netfs->name);
+ fscache_relinquish_cookie(netfs->primary_index, NULL, false);
+ pr_notice("Netfs '%s' unregistered from caching\n", netfs->name);
_leave("");
}
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 0438d4cd91ef..43e6e28c164f 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -36,8 +36,6 @@ struct fscache_objlist_data {
#define FSCACHE_OBJLIST_CONFIG_NOEVENTS 0x00000800 /* show objects without no events */
#define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with work */
#define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without work */
-
- u8 buf[512]; /* key and aux data buffer */
};
/*
@@ -170,7 +168,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
struct fscache_cookie *cookie;
unsigned long config = data->config;
char _type[3], *type;
- u8 *buf = data->buf, *p;
+ u8 *p;
if ((unsigned long) v == 1) {
seq_puts(m, "OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS"
@@ -254,7 +252,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
if (fscache_use_cookie(obj)) {
uint16_t keylen = 0, auxlen = 0;
- switch (cookie->def->type) {
+ switch (cookie->type) {
case 0:
type = "IX";
break;
@@ -263,7 +261,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
break;
default:
snprintf(_type, sizeof(_type), "%02u",
- cookie->def->type);
+ cookie->type);
type = _type;
break;
}
@@ -274,30 +272,30 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
cookie->flags,
cookie->netfs_data);
- if (cookie->def->get_key &&
- config & FSCACHE_OBJLIST_CONFIG_KEY)
- keylen = cookie->def->get_key(cookie->netfs_data,
- buf, 400);
+ if (config & FSCACHE_OBJLIST_CONFIG_KEY)
+ keylen = cookie->key_len;
- if (cookie->def->get_aux &&
- config & FSCACHE_OBJLIST_CONFIG_AUX)
- auxlen = cookie->def->get_aux(cookie->netfs_data,
- buf + keylen, 512 - keylen);
- fscache_unuse_cookie(obj);
+ if (config & FSCACHE_OBJLIST_CONFIG_AUX)
+ auxlen = cookie->aux_len;
if (keylen > 0 || auxlen > 0) {
seq_puts(m, " ");
- for (p = buf; keylen > 0; keylen--)
+ p = keylen <= sizeof(cookie->inline_key) ?
+ cookie->inline_key : cookie->key;
+ for (; keylen > 0; keylen--)
seq_printf(m, "%02x", *p++);
if (auxlen > 0) {
if (config & FSCACHE_OBJLIST_CONFIG_KEY)
seq_puts(m, ", ");
+ p = auxlen <= sizeof(cookie->inline_aux) ?
+ cookie->inline_aux : cookie->aux;
for (; auxlen > 0; auxlen--)
seq_printf(m, "%02x", *p++);
}
}
seq_puts(m, "\n");
+ fscache_unuse_cookie(obj);
} else {
seq_puts(m, "<no_netfs>\n");
}
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 7a182c87f378..1085ca12e25c 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -138,10 +138,13 @@ static const struct fscache_transition fscache_osm_run_oob[] = {
{ 0, NULL }
};
-static int fscache_get_object(struct fscache_object *);
-static void fscache_put_object(struct fscache_object *);
+static int fscache_get_object(struct fscache_object *,
+ enum fscache_obj_ref_trace);
+static void fscache_put_object(struct fscache_object *,
+ enum fscache_obj_ref_trace);
static bool fscache_enqueue_dependents(struct fscache_object *, int);
static void fscache_dequeue_object(struct fscache_object *);
+static void fscache_update_aux_data(struct fscache_object *);
/*
* we need to notify the parent when an op completes that we had outstanding
@@ -170,6 +173,7 @@ static void fscache_object_sm_dispatcher(struct fscache_object *object)
const struct fscache_transition *t;
const struct fscache_state *state, *new_state;
unsigned long events, event_mask;
+ bool oob;
int event = -1;
ASSERT(object != NULL);
@@ -188,6 +192,7 @@ restart_masked:
if (events & object->oob_event_mask) {
_debug("{OBJ%x} oob %lx",
object->debug_id, events & object->oob_event_mask);
+ oob = true;
for (t = object->oob_table; t->events; t++) {
if (events & t->events) {
state = t->transit_to;
@@ -199,6 +204,7 @@ restart_masked:
}
}
}
+ oob = false;
/* Wait states are just transition tables */
if (!state->work) {
@@ -207,6 +213,8 @@ restart_masked:
if (events & t->events) {
new_state = t->transit_to;
event = fls(events & t->events) - 1;
+ trace_fscache_osm(object, state,
+ true, false, event);
clear_bit(event, &object->events);
_debug("{OBJ%x} ev %d: %s -> %s",
object->debug_id, event,
@@ -226,6 +234,7 @@ restart_masked:
execute_work_state:
_debug("{OBJ%x} exec %s", object->debug_id, state->name);
+ trace_fscache_osm(object, state, false, oob, event);
new_state = state->work(object, event);
event = -1;
if (new_state == NO_TRANSIT) {
@@ -279,7 +288,7 @@ static void fscache_object_work_func(struct work_struct *work)
start = jiffies;
fscache_object_sm_dispatcher(object);
fscache_hist(fscache_objs_histogram, start);
- fscache_put_object(object);
+ fscache_put_object(object, fscache_obj_put_work);
}
/**
@@ -397,7 +406,7 @@ static const struct fscache_state *fscache_initialise_object(struct fscache_obje
fscache_stat(&fscache_n_cop_grab_object);
success = false;
if (fscache_object_is_live(parent) &&
- object->cache->ops->grab_object(object)) {
+ object->cache->ops->grab_object(object, fscache_obj_get_add_to_deps)) {
list_add(&object->dep_link, &parent->dependents);
success = true;
}
@@ -703,6 +712,11 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob
ASSERT(cookie != NULL);
ASSERT(!hlist_unhashed(&object->cookie_link));
+ if (test_bit(FSCACHE_COOKIE_AUX_UPDATED, &cookie->flags)) {
+ _debug("final update");
+ fscache_update_aux_data(object);
+ }
+
/* Make sure the cookie no longer points here and that the netfs isn't
* waiting for us.
*/
@@ -745,7 +759,7 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob
}
/* this just shifts the object release to the work processor */
- fscache_put_object(object);
+ fscache_put_object(object, fscache_obj_put_drop_obj);
fscache_stat(&fscache_n_object_dead);
_leave("");
@@ -755,12 +769,13 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob
/*
* get a ref on an object
*/
-static int fscache_get_object(struct fscache_object *object)
+static int fscache_get_object(struct fscache_object *object,
+ enum fscache_obj_ref_trace why)
{
int ret;
fscache_stat(&fscache_n_cop_grab_object);
- ret = object->cache->ops->grab_object(object) ? 0 : -EAGAIN;
+ ret = object->cache->ops->grab_object(object, why) ? 0 : -EAGAIN;
fscache_stat_d(&fscache_n_cop_grab_object);
return ret;
}
@@ -768,10 +783,11 @@ static int fscache_get_object(struct fscache_object *object)
/*
* Discard a ref on an object
*/
-static void fscache_put_object(struct fscache_object *object)
+static void fscache_put_object(struct fscache_object *object,
+ enum fscache_obj_ref_trace why)
{
fscache_stat(&fscache_n_cop_put_object);
- object->cache->ops->put_object(object);
+ object->cache->ops->put_object(object, why);
fscache_stat_d(&fscache_n_cop_put_object);
}
@@ -786,7 +802,7 @@ void fscache_object_destroy(struct fscache_object *object)
fscache_objlist_remove(object);
/* We can get rid of the cookie now */
- fscache_cookie_put(object->cookie);
+ fscache_cookie_put(object->cookie, fscache_cookie_put_object);
object->cookie = NULL;
}
EXPORT_SYMBOL(fscache_object_destroy);
@@ -798,7 +814,7 @@ void fscache_enqueue_object(struct fscache_object *object)
{
_enter("{OBJ%x}", object->debug_id);
- if (fscache_get_object(object) >= 0) {
+ if (fscache_get_object(object, fscache_obj_get_queue) >= 0) {
wait_queue_head_t *cong_wq =
&get_cpu_var(fscache_object_cong_wait);
@@ -806,7 +822,7 @@ void fscache_enqueue_object(struct fscache_object *object)
if (fscache_object_congested())
wake_up(cong_wq);
} else
- fscache_put_object(object);
+ fscache_put_object(object, fscache_obj_put_queue);
put_cpu_var(fscache_object_cong_wait);
}
@@ -866,7 +882,7 @@ static bool fscache_enqueue_dependents(struct fscache_object *object, int event)
list_del_init(&dep->dep_link);
fscache_raise_event(dep, event);
- fscache_put_object(dep);
+ fscache_put_object(dep, fscache_obj_put_enq_dep);
if (!list_empty(&object->dependents) && need_resched()) {
ret = false;
@@ -906,7 +922,8 @@ static void fscache_dequeue_object(struct fscache_object *object)
* and creation).
*/
enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
- const void *data, uint16_t datalen)
+ const void *data, uint16_t datalen,
+ loff_t object_size)
{
enum fscache_checkaux result;
@@ -916,7 +933,7 @@ enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
}
result = object->cookie->def->check_aux(object->cookie->netfs_data,
- data, datalen);
+ data, datalen, object_size);
switch (result) {
/* entry okay as is */
case FSCACHE_CHECKAUX_OKAY:
@@ -972,11 +989,12 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj
if (!op)
goto nomem;
- fscache_operation_init(op, object->cache->ops->invalidate_object,
+ fscache_operation_init(cookie, op, object->cache->ops->invalidate_object,
NULL, NULL);
op->flags = FSCACHE_OP_ASYNC |
(1 << FSCACHE_OP_EXCLUSIVE) |
(1 << FSCACHE_OP_UNUSE_COOKIE);
+ trace_fscache_page_op(cookie, NULL, op, fscache_page_op_invalidate);
spin_lock(&cookie->lock);
if (fscache_submit_exclusive_op(object, op) < 0)
@@ -1026,6 +1044,17 @@ static const struct fscache_state *fscache_invalidate_object(struct fscache_obje
}
/*
+ * Update auxiliary data.
+ */
+static void fscache_update_aux_data(struct fscache_object *object)
+{
+ fscache_stat(&fscache_n_updates_run);
+ fscache_stat(&fscache_n_cop_update_object);
+ object->cache->ops->update_object(object);
+ fscache_stat_d(&fscache_n_cop_update_object);
+}
+
+/*
* Asynchronously update an object.
*/
static const struct fscache_state *fscache_update_object(struct fscache_object *object,
@@ -1033,10 +1062,7 @@ static const struct fscache_state *fscache_update_object(struct fscache_object *
{
_enter("{OBJ%x},%d", object->debug_id, event);
- fscache_stat(&fscache_n_updates_run);
- fscache_stat(&fscache_n_cop_update_object);
- object->cache->ops->update_object(object);
- fscache_stat_d(&fscache_n_cop_update_object);
+ fscache_update_aux_data(object);
_leave("");
return transit_to(WAIT_FOR_CMD);
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index de67745e1cd7..e30c5975ea58 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -32,7 +32,8 @@ static void fscache_operation_dummy_cancel(struct fscache_operation *op)
* Do basic initialisation of an operation. The caller must still set flags,
* object and processor if needed.
*/
-void fscache_operation_init(struct fscache_operation *op,
+void fscache_operation_init(struct fscache_cookie *cookie,
+ struct fscache_operation *op,
fscache_operation_processor_t processor,
fscache_operation_cancel_t cancel,
fscache_operation_release_t release)
@@ -46,6 +47,7 @@ void fscache_operation_init(struct fscache_operation *op,
op->release = release;
INIT_LIST_HEAD(&op->pend_link);
fscache_stat(&fscache_n_op_initialised);
+ trace_fscache_op(cookie, op, fscache_op_init);
}
EXPORT_SYMBOL(fscache_operation_init);
@@ -59,6 +61,8 @@ EXPORT_SYMBOL(fscache_operation_init);
*/
void fscache_enqueue_operation(struct fscache_operation *op)
{
+ struct fscache_cookie *cookie = op->object->cookie;
+
_enter("{OBJ%x OP%x,%u}",
op->object->debug_id, op->debug_id, atomic_read(&op->usage));
@@ -71,12 +75,14 @@ void fscache_enqueue_operation(struct fscache_operation *op)
fscache_stat(&fscache_n_op_enqueue);
switch (op->flags & FSCACHE_OP_TYPE) {
case FSCACHE_OP_ASYNC:
+ trace_fscache_op(cookie, op, fscache_op_enqueue_async);
_debug("queue async");
atomic_inc(&op->usage);
if (!queue_work(fscache_op_wq, &op->work))
fscache_put_operation(op);
break;
case FSCACHE_OP_MYTHREAD:
+ trace_fscache_op(cookie, op, fscache_op_enqueue_mythread);
_debug("queue for caller's attention");
break;
default:
@@ -101,6 +107,8 @@ static void fscache_run_op(struct fscache_object *object,
wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
if (op->processor)
fscache_enqueue_operation(op);
+ else
+ trace_fscache_op(object->cookie, op, fscache_op_run);
fscache_stat(&fscache_n_op_run);
}
@@ -155,6 +163,8 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
_enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
+ trace_fscache_op(object->cookie, op, fscache_op_submit_ex);
+
ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED);
ASSERTCMP(atomic_read(&op->usage), >, 0);
@@ -240,6 +250,8 @@ int fscache_submit_op(struct fscache_object *object,
_enter("{OBJ%x OP%x},{%u}",
object->debug_id, op->debug_id, atomic_read(&op->usage));
+ trace_fscache_op(object->cookie, op, fscache_op_submit);
+
ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED);
ASSERTCMP(atomic_read(&op->usage), >, 0);
@@ -357,6 +369,8 @@ int fscache_cancel_op(struct fscache_operation *op,
_enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id);
+ trace_fscache_op(object->cookie, op, fscache_op_cancel);
+
ASSERTCMP(op->state, >=, FSCACHE_OP_ST_PENDING);
ASSERTCMP(op->state, !=, FSCACHE_OP_ST_CANCELLED);
ASSERTCMP(atomic_read(&op->usage), >, 0);
@@ -419,6 +433,8 @@ void fscache_cancel_all_ops(struct fscache_object *object)
fscache_stat(&fscache_n_op_cancelled);
list_del_init(&op->pend_link);
+ trace_fscache_op(object->cookie, op, fscache_op_cancel_all);
+
ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING);
op->cancel(op);
op->state = FSCACHE_OP_ST_CANCELLED;
@@ -454,9 +470,11 @@ void fscache_op_complete(struct fscache_operation *op, bool cancelled)
spin_lock(&object->lock);
if (!cancelled) {
+ trace_fscache_op(object->cookie, op, fscache_op_completed);
op->state = FSCACHE_OP_ST_COMPLETE;
} else {
op->cancel(op);
+ trace_fscache_op(object->cookie, op, fscache_op_cancelled);
op->state = FSCACHE_OP_ST_CANCELLED;
}
@@ -488,6 +506,8 @@ void fscache_put_operation(struct fscache_operation *op)
if (!atomic_dec_and_test(&op->usage))
return;
+ trace_fscache_op(op->object ? op->object->cookie : NULL, op, fscache_op_put);
+
_debug("PUT OP");
ASSERTIFCMP(op->state != FSCACHE_OP_ST_INITIALISED &&
op->state != FSCACHE_OP_ST_COMPLETE,
@@ -563,6 +583,8 @@ void fscache_operation_gc(struct work_struct *work)
spin_unlock(&cache->op_gc_list_lock);
object = op->object;
+ trace_fscache_op(object->cookie, op, fscache_op_gc);
+
spin_lock(&object->lock);
_debug("GC DEFERRED REL OBJ%x OP%x",
@@ -601,6 +623,8 @@ void fscache_op_work_func(struct work_struct *work)
_enter("{OBJ%x OP%x,%d}",
op->object->debug_id, op->debug_id, atomic_read(&op->usage));
+ trace_fscache_op(op->object->cookie, op, fscache_op_work);
+
ASSERT(op->processor != NULL);
start = jiffies;
op->processor(op);
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 961029e04027..111349f67d98 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -27,6 +27,7 @@ bool __fscache_check_page_write(struct fscache_cookie *cookie, struct page *page
rcu_read_lock();
val = radix_tree_lookup(&cookie->stores, page->index);
rcu_read_unlock();
+ trace_fscache_check_page(cookie, page, val, 0);
return val != NULL;
}
@@ -39,6 +40,8 @@ void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *pa
{
wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0);
+ trace_fscache_page(cookie, page, fscache_page_write_wait);
+
wait_event(*wq, !__fscache_check_page_write(cookie, page));
}
EXPORT_SYMBOL(__fscache_wait_on_page_write);
@@ -69,6 +72,8 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
_enter("%p,%p,%x", cookie, page, gfp);
+ trace_fscache_page(cookie, page, fscache_page_maybe_release);
+
try_again:
rcu_read_lock();
val = radix_tree_lookup(&cookie->stores, page->index);
@@ -101,6 +106,7 @@ try_again:
}
xpage = radix_tree_delete(&cookie->stores, page->index);
+ trace_fscache_page(cookie, page, fscache_page_radix_delete);
spin_unlock(&cookie->stores_lock);
if (xpage) {
@@ -112,6 +118,7 @@ try_again:
}
wake_up_bit(&cookie->flags, 0);
+ trace_fscache_wake_cookie(cookie);
if (xpage)
put_page(xpage);
__fscache_uncache_page(cookie, page);
@@ -144,7 +151,7 @@ static void fscache_end_page_write(struct fscache_object *object,
struct page *page)
{
struct fscache_cookie *cookie;
- struct page *xpage = NULL;
+ struct page *xpage = NULL, *val;
spin_lock(&object->lock);
cookie = object->cookie;
@@ -154,13 +161,24 @@ static void fscache_end_page_write(struct fscache_object *object,
spin_lock(&cookie->stores_lock);
radix_tree_tag_clear(&cookie->stores, page->index,
FSCACHE_COOKIE_STORING_TAG);
+ trace_fscache_page(cookie, page, fscache_page_radix_clear_store);
if (!radix_tree_tag_get(&cookie->stores, page->index,
FSCACHE_COOKIE_PENDING_TAG)) {
fscache_stat(&fscache_n_store_radix_deletes);
xpage = radix_tree_delete(&cookie->stores, page->index);
+ trace_fscache_page(cookie, page, fscache_page_radix_delete);
+ trace_fscache_page(cookie, page, fscache_page_write_end);
+
+ val = radix_tree_lookup(&cookie->stores, page->index);
+ trace_fscache_check_page(cookie, page, val, 1);
+ } else {
+ trace_fscache_page(cookie, page, fscache_page_write_end_pend);
}
spin_unlock(&cookie->stores_lock);
wake_up_bit(&cookie->flags, 0);
+ trace_fscache_wake_cookie(cookie);
+ } else {
+ trace_fscache_page(cookie, page, fscache_page_write_end_noc);
}
spin_unlock(&object->lock);
if (xpage)
@@ -185,9 +203,11 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
fscache_stat_d(&fscache_n_cop_attr_changed);
if (ret < 0)
fscache_abort_object(object);
+ fscache_op_complete(op, ret < 0);
+ } else {
+ fscache_op_complete(op, true);
}
- fscache_op_complete(op, true);
_leave("");
}
@@ -213,7 +233,8 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
return -ENOMEM;
}
- fscache_operation_init(op, fscache_attr_changed_op, NULL, NULL);
+ fscache_operation_init(cookie, op, fscache_attr_changed_op, NULL, NULL);
+ trace_fscache_page_op(cookie, NULL, op, fscache_page_op_attr_changed);
op->flags = FSCACHE_OP_ASYNC |
(1 << FSCACHE_OP_EXCLUSIVE) |
(1 << FSCACHE_OP_UNUSE_COOKIE);
@@ -297,7 +318,7 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
return NULL;
}
- fscache_operation_init(&op->op, NULL,
+ fscache_operation_init(cookie, &op->op, NULL,
fscache_do_cancel_retrieval,
fscache_release_retrieval_op);
op->op.flags = FSCACHE_OP_MYTHREAD |
@@ -368,6 +389,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
fscache_stat(stat_op_waits);
if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
TASK_INTERRUPTIBLE) != 0) {
+ trace_fscache_op(object->cookie, op, fscache_op_signal);
ret = fscache_cancel_op(op, false);
if (ret == 0)
return -ERESTARTSYS;
@@ -389,6 +411,7 @@ check_if_dead:
if (unlikely(fscache_object_is_dying(object) ||
fscache_cache_is_broken(object))) {
enum fscache_operation_state state = op->state;
+ trace_fscache_op(object->cookie, op, fscache_op_signal);
fscache_cancel_op(op, true);
if (stat_object_dead)
fscache_stat(stat_object_dead);
@@ -443,6 +466,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
return -ENOMEM;
}
atomic_set(&op->n_pages, 1);
+ trace_fscache_page_op(cookie, page, &op->op, fscache_page_op_retr_one);
spin_lock(&cookie->lock);
@@ -571,6 +595,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
if (!op)
return -ENOMEM;
atomic_set(&op->n_pages, *nr_pages);
+ trace_fscache_page_op(cookie, NULL, &op->op, fscache_page_op_retr_multi);
spin_lock(&cookie->lock);
@@ -682,6 +707,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
if (!op)
return -ENOMEM;
atomic_set(&op->n_pages, 1);
+ trace_fscache_page_op(cookie, page, &op->op, fscache_page_op_alloc_one);
spin_lock(&cookie->lock);
@@ -776,15 +802,17 @@ static void fscache_write_op(struct fscache_operation *_op)
_enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
+again:
spin_lock(&object->lock);
cookie = object->cookie;
if (!fscache_object_is_active(object)) {
- /* If we get here, then the on-disk cache object likely longer
- * exists, so we should just cancel this write operation.
+ /* If we get here, then the on-disk cache object likely no
+ * longer exists, so we should just cancel this write
+ * operation.
*/
spin_unlock(&object->lock);
- fscache_op_complete(&op->op, false);
+ fscache_op_complete(&op->op, true);
_leave(" [inactive]");
return;
}
@@ -797,7 +825,7 @@ static void fscache_write_op(struct fscache_operation *_op)
* cancel this write operation.
*/
spin_unlock(&object->lock);
- fscache_op_complete(&op->op, false);
+ fscache_op_complete(&op->op, true);
_leave(" [cancel] op{f=%lx s=%u} obj{s=%s f=%lx}",
_op->flags, _op->state, object->state->short_name,
object->flags);
@@ -809,30 +837,33 @@ static void fscache_write_op(struct fscache_operation *_op)
fscache_stat(&fscache_n_store_calls);
/* find a page to store */
+ results[0] = NULL;
page = NULL;
n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, 1,
FSCACHE_COOKIE_PENDING_TAG);
+ trace_fscache_gang_lookup(cookie, &op->op, results, n, op->store_limit);
if (n != 1)
goto superseded;
page = results[0];
_debug("gang %d [%lx]", n, page->index);
- if (page->index >= op->store_limit) {
- fscache_stat(&fscache_n_store_pages_over_limit);
- goto superseded;
- }
radix_tree_tag_set(&cookie->stores, page->index,
FSCACHE_COOKIE_STORING_TAG);
radix_tree_tag_clear(&cookie->stores, page->index,
FSCACHE_COOKIE_PENDING_TAG);
+ trace_fscache_page(cookie, page, fscache_page_radix_pend2store);
spin_unlock(&cookie->stores_lock);
spin_unlock(&object->lock);
+ if (page->index >= op->store_limit)
+ goto discard_page;
+
fscache_stat(&fscache_n_store_pages);
fscache_stat(&fscache_n_cop_write_page);
ret = object->cache->ops->write_page(op, page);
fscache_stat_d(&fscache_n_cop_write_page);
+ trace_fscache_wrote_page(cookie, page, &op->op, ret);
fscache_end_page_write(object, page);
if (ret < 0) {
fscache_abort_object(object);
@@ -844,6 +875,12 @@ static void fscache_write_op(struct fscache_operation *_op)
_leave("");
return;
+discard_page:
+ fscache_stat(&fscache_n_store_pages_over_limit);
+ trace_fscache_wrote_page(cookie, page, &op->op, -ENOBUFS);
+ fscache_end_page_write(object, page);
+ goto again;
+
superseded:
/* this writer is going away and there aren't any more things to
* write */
@@ -851,7 +888,7 @@ superseded:
spin_unlock(&cookie->stores_lock);
clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
spin_unlock(&object->lock);
- fscache_op_complete(&op->op, true);
+ fscache_op_complete(&op->op, false);
_leave("");
}
@@ -879,6 +916,8 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie)
for (i = n - 1; i >= 0; i--) {
page = results[i];
radix_tree_delete(&cookie->stores, page->index);
+ trace_fscache_page(cookie, page, fscache_page_radix_delete);
+ trace_fscache_page(cookie, page, fscache_page_inval);
}
spin_unlock(&cookie->stores_lock);
@@ -888,6 +927,7 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie)
}
wake_up_bit(&cookie->flags, 0);
+ trace_fscache_wake_cookie(cookie);
_leave("");
}
@@ -923,6 +963,7 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie)
*/
int __fscache_write_page(struct fscache_cookie *cookie,
struct page *page,
+ loff_t object_size,
gfp_t gfp)
{
struct fscache_storage *op;
@@ -946,7 +987,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
if (!op)
goto nomem;
- fscache_operation_init(&op->op, fscache_write_op, NULL,
+ fscache_operation_init(cookie, &op->op, fscache_write_op, NULL,
fscache_release_write_op);
op->op.flags = FSCACHE_OP_ASYNC |
(1 << FSCACHE_OP_WAITING) |
@@ -956,6 +997,8 @@ int __fscache_write_page(struct fscache_cookie *cookie,
if (ret < 0)
goto nomem_free;
+ trace_fscache_page_op(cookie, page, &op->op, fscache_page_op_write_one);
+
ret = -ENOBUFS;
spin_lock(&cookie->lock);
@@ -967,9 +1010,15 @@ int __fscache_write_page(struct fscache_cookie *cookie,
if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
goto nobufs;
+ trace_fscache_page(cookie, page, fscache_page_write);
+
/* add the page to the pending-storage radix tree on the backing
* object */
spin_lock(&object->lock);
+
+ if (object->store_limit_l != object_size)
+ fscache_set_store_limit(object, object_size);
+
spin_lock(&cookie->stores_lock);
_debug("store limit %llx", (unsigned long long) object->store_limit);
@@ -982,8 +1031,10 @@ int __fscache_write_page(struct fscache_cookie *cookie,
goto nobufs_unlock_obj;
}
+ trace_fscache_page(cookie, page, fscache_page_radix_insert);
radix_tree_tag_set(&cookie->stores, page->index,
FSCACHE_COOKIE_PENDING_TAG);
+ trace_fscache_page(cookie, page, fscache_page_radix_set_pend);
get_page(page);
/* we only want one writer at a time, but we do need to queue new
@@ -1026,6 +1077,7 @@ already_pending:
submit_failed:
spin_lock(&cookie->stores_lock);
radix_tree_delete(&cookie->stores, page->index);
+ trace_fscache_page(cookie, page, fscache_page_radix_delete);
spin_unlock(&cookie->stores_lock);
wake_cookie = __fscache_unuse_cookie(cookie);
put_page(page);
@@ -1072,6 +1124,8 @@ void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page)
if (!PageFsCache(page))
goto done;
+ trace_fscache_page(cookie, page, fscache_page_uncache);
+
/* get the object */
spin_lock(&cookie->lock);
@@ -1120,6 +1174,8 @@ void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page)
atomic_inc(&fscache_n_marks);
#endif
+ trace_fscache_page(cookie, page, fscache_page_cached);
+
_debug("- mark %p{%lx}", page, page->index);
if (TestSetPageFsCache(page)) {
static bool once_only;
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 7ac6e839b065..fcc8c2f2690e 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -21,7 +21,6 @@
atomic_t fscache_n_op_pend;
atomic_t fscache_n_op_run;
atomic_t fscache_n_op_enqueue;
-atomic_t fscache_n_op_requeue;
atomic_t fscache_n_op_deferred_release;
atomic_t fscache_n_op_initialised;
atomic_t fscache_n_op_release;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 624f18bbfd2b..ef309958e060 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1080,6 +1080,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_time_gran = 1;
sb->s_export_op = &fuse_export_operations;
+ sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
+ if (sb->s_user_ns != &init_user_ns)
+ sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
file = fget(d.fd);
err = -EINVAL;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 620be0521866..cf5c7f3080d2 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -800,7 +800,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
int need_endtrans = 0;
int ret;
- if (!(flags & (I_DIRTY_DATASYNC|I_DIRTY_SYNC)))
+ if (!(flags & I_DIRTY_INODE))
return;
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return;
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 05de20954659..f2bce1e0f6fb 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -308,7 +308,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
}
ip->i_inode.i_ctime = current_time(&ip->i_inode);
- __mark_inode_dirty(&ip->i_inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+ __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
gfs2_trans_end(sdp);
@@ -768,7 +768,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
goto out_end_trans;
ip->i_inode.i_ctime = current_time(&ip->i_inode);
- __mark_inode_dirty(&ip->i_inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+ __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
out_end_trans:
gfs2_trans_end(GFS2_SB(&ip->i_inode));
@@ -896,7 +896,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
ea_set_remove_stuffed(ip, es->es_el);
ip->i_inode.i_ctime = current_time(&ip->i_inode);
- __mark_inode_dirty(&ip->i_inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+ __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
gfs2_trans_end(GFS2_SB(&ip->i_inode));
return error;
@@ -1114,7 +1114,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
}
ip->i_inode.i_ctime = current_time(&ip->i_inode);
- __mark_inode_dirty(&ip->i_inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+ __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
gfs2_trans_end(GFS2_SB(&ip->i_inode));
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b9a254dcc0e7..d508c7844681 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -138,10 +138,14 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
/*
* page based offset in vm_pgoff could be sufficiently large to
- * overflow a (l)off_t when converted to byte offset.
+ * overflow a loff_t when converted to byte offset. This can
+ * only happen on architectures where sizeof(loff_t) ==
+ * sizeof(unsigned long). So, only check in those instances.
*/
- if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
- return -EINVAL;
+ if (sizeof(unsigned long) == sizeof(loff_t)) {
+ if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
+ return -EINVAL;
+ }
/* must be huge page aligned */
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
diff --git a/fs/internal.h b/fs/internal.h
index 980d005b21b4..e08972db0303 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,7 +125,6 @@ int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
-extern int open_check_o_direct(struct file *f);
extern int vfs_open(const struct path *, struct file *, const struct cred *);
extern struct file *filp_clone_open(struct file *);
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 4a6cf289be24..83b8f06b4a64 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -21,14 +21,6 @@
#include <linux/pagemap.h>
#include "nodelist.h"
-struct erase_priv_struct {
- struct jffs2_eraseblock *jeb;
- struct jffs2_sb_info *c;
-};
-
-#ifndef __ECOS
-static void jffs2_erase_callback(struct erase_info *);
-#endif
static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t bad_offset);
static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);
static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);
@@ -51,7 +43,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
jffs2_dbg(1, "%s(): erase block %#08x (range %#08x-%#08x)\n",
__func__,
jeb->offset, jeb->offset, jeb->offset + c->sector_size);
- instr = kmalloc(sizeof(struct erase_info) + sizeof(struct erase_priv_struct), GFP_KERNEL);
+ instr = kmalloc(sizeof(struct erase_info), GFP_KERNEL);
if (!instr) {
pr_warn("kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n");
mutex_lock(&c->erase_free_sem);
@@ -67,18 +59,15 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
memset(instr, 0, sizeof(*instr));
- instr->mtd = c->mtd;
instr->addr = jeb->offset;
instr->len = c->sector_size;
- instr->callback = jffs2_erase_callback;
- instr->priv = (unsigned long)(&instr[1]);
-
- ((struct erase_priv_struct *)instr->priv)->jeb = jeb;
- ((struct erase_priv_struct *)instr->priv)->c = c;
ret = mtd_erase(c->mtd, instr);
- if (!ret)
+ if (!ret) {
+ jffs2_erase_succeeded(c, jeb);
+ kfree(instr);
return;
+ }
bad_offset = instr->fail_addr;
kfree(instr);
@@ -214,22 +203,6 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
wake_up(&c->erase_wait);
}
-#ifndef __ECOS
-static void jffs2_erase_callback(struct erase_info *instr)
-{
- struct erase_priv_struct *priv = (void *)instr->priv;
-
- if(instr->state != MTD_ERASE_DONE) {
- pr_warn("Erase at 0x%08llx finished, but state != MTD_ERASE_DONE. State is 0x%x instead.\n",
- (unsigned long long)instr->addr, instr->state);
- jffs2_erase_failed(priv->c, priv->jeb, instr->fail_addr);
- } else {
- jffs2_erase_succeeded(priv->c, priv->jeb);
- }
- kfree(instr);
-}
-#endif /* !__ECOS */
-
/* Hmmm. Maybe we should accept the extra space it takes and make
this a standard doubly-linked list? */
static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 9c36d614bf89..346ed161756d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -57,8 +57,8 @@ static struct task_struct *nlmsvc_task;
static struct svc_rqst *nlmsvc_rqst;
unsigned long nlmsvc_timeout;
-atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0);
-DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq);
+static atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0);
+static DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq);
unsigned int lockd_net_id;
diff --git a/fs/locks.c b/fs/locks.c
index d6ff4beb70ce..62bbe8b31f26 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -559,7 +559,7 @@ static const struct lock_manager_operations lease_manager_ops = {
* Initialize a lease, use the default lock manager operations
*/
static int lease_init(struct file *filp, long type, struct file_lock *fl)
- {
+{
if (assign_type(fl, type) != 0)
return -EINVAL;
diff --git a/fs/namei.c b/fs/namei.c
index a09419379f5d..186bd2464fd5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -39,6 +39,7 @@
#include <linux/bitops.h>
#include <linux/init_task.h>
#include <linux/uaccess.h>
+#include <linux/build_bug.h>
#include "internal.h"
#include "mount.h"
@@ -130,6 +131,7 @@ getname_flags(const char __user *filename, int flags, int *empty)
struct filename *result;
char *kname;
int len;
+ BUILD_BUG_ON(offsetof(struct filename, iname) % sizeof(long) != 0);
result = audit_reusename(filename);
if (result)
@@ -222,9 +224,10 @@ getname_kernel(const char * filename)
if (len <= EMBEDDED_NAME_MAX) {
result->name = (char *)result->iname;
} else if (len <= PATH_MAX) {
+ const size_t size = offsetof(struct filename, iname[1]);
struct filename *tmp;
- tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+ tmp = kmalloc(size, GFP_KERNEL);
if (unlikely(!tmp)) {
__putname(result);
return ERR_PTR(-ENOMEM);
@@ -927,7 +930,8 @@ static inline int may_follow_link(struct nameidata *nd)
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
- audit_log_link_denied("follow_link", &nd->stack[0].link);
+ audit_inode(nd->name, nd->stack[0].link.dentry, 0);
+ audit_log_link_denied("follow_link");
return -EACCES;
}
@@ -993,7 +997,7 @@ static int may_linkat(struct path *link)
if (safe_hardlink_source(inode) || inode_owner_or_capable(inode))
return 0;
- audit_log_link_denied("linkat", link);
+ audit_log_link_denied("linkat");
return -EPERM;
}
@@ -1594,22 +1598,21 @@ static int lookup_fast(struct nameidata *nd,
}
/* Fast lookup failed, do it the slow way */
-static struct dentry *lookup_slow(const struct qstr *name,
- struct dentry *dir,
- unsigned int flags)
+static struct dentry *__lookup_slow(const struct qstr *name,
+ struct dentry *dir,
+ unsigned int flags)
{
- struct dentry *dentry = ERR_PTR(-ENOENT), *old;
+ struct dentry *dentry, *old;
struct inode *inode = dir->d_inode;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
- inode_lock_shared(inode);
/* Don't go there if it's already dead */
if (unlikely(IS_DEADDIR(inode)))
- goto out;
+ return ERR_PTR(-ENOENT);
again:
dentry = d_alloc_parallel(dir, name, &wq);
if (IS_ERR(dentry))
- goto out;
+ return dentry;
if (unlikely(!d_in_lookup(dentry))) {
if (!(flags & LOOKUP_NO_REVAL)) {
int error = d_revalidate(dentry, flags);
@@ -1631,11 +1634,21 @@ again:
dentry = old;
}
}
-out:
- inode_unlock_shared(inode);
return dentry;
}
+static struct dentry *lookup_slow(const struct qstr *name,
+ struct dentry *dir,
+ unsigned int flags)
+{
+ struct inode *inode = dir->d_inode;
+ struct dentry *res;
+ inode_lock_shared(inode);
+ res = __lookup_slow(name, dir, flags);
+ inode_unlock_shared(inode);
+ return res;
+}
+
static inline int may_lookup(struct nameidata *nd)
{
if (nd->flags & LOOKUP_RCU) {
@@ -2418,56 +2431,63 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
}
EXPORT_SYMBOL(vfs_path_lookup);
-/**
- * lookup_one_len - filesystem helper to lookup single pathname component
- * @name: pathname component to lookup
- * @base: base directory to lookup from
- * @len: maximum length @len should be interpreted to
- *
- * Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code.
- *
- * The caller must hold base->i_mutex.
- */
-struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
+static int lookup_one_len_common(const char *name, struct dentry *base,
+ int len, struct qstr *this)
{
- struct qstr this;
- unsigned int c;
- int err;
-
- WARN_ON_ONCE(!inode_is_locked(base->d_inode));
-
- this.name = name;
- this.len = len;
- this.hash = full_name_hash(base, name, len);
+ this->name = name;
+ this->len = len;
+ this->hash = full_name_hash(base, name, len);
if (!len)
- return ERR_PTR(-EACCES);
+ return -EACCES;
if (unlikely(name[0] == '.')) {
if (len < 2 || (len == 2 && name[1] == '.'))
- return ERR_PTR(-EACCES);
+ return -EACCES;
}
while (len--) {
- c = *(const unsigned char *)name++;
+ unsigned int c = *(const unsigned char *)name++;
if (c == '/' || c == '\0')
- return ERR_PTR(-EACCES);
+ return -EACCES;
}
/*
* See if the low-level filesystem might want
* to use its own hash..
*/
if (base->d_flags & DCACHE_OP_HASH) {
- int err = base->d_op->d_hash(base, &this);
+ int err = base->d_op->d_hash(base, this);
if (err < 0)
- return ERR_PTR(err);
+ return err;
}
- err = inode_permission(base->d_inode, MAY_EXEC);
+ return inode_permission(base->d_inode, MAY_EXEC);
+}
+
+/**
+ * lookup_one_len - filesystem helper to lookup single pathname component
+ * @name: pathname component to lookup
+ * @base: base directory to lookup from
+ * @len: maximum length @len should be interpreted to
+ *
+ * Note that this routine is purely a helper for filesystem usage and should
+ * not be called by generic code.
+ *
+ * The caller must hold base->i_mutex.
+ */
+struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
+{
+ struct dentry *dentry;
+ struct qstr this;
+ int err;
+
+ WARN_ON_ONCE(!inode_is_locked(base->d_inode));
+
+ err = lookup_one_len_common(name, base, len, &this);
if (err)
return ERR_PTR(err);
- return __lookup_hash(&this, base, 0);
+ dentry = lookup_dcache(&this, base, 0);
+ return dentry ? dentry : __lookup_slow(&this, base, 0);
}
EXPORT_SYMBOL(lookup_one_len);
@@ -2487,37 +2507,10 @@ struct dentry *lookup_one_len_unlocked(const char *name,
struct dentry *base, int len)
{
struct qstr this;
- unsigned int c;
int err;
struct dentry *ret;
- this.name = name;
- this.len = len;
- this.hash = full_name_hash(base, name, len);
- if (!len)
- return ERR_PTR(-EACCES);
-
- if (unlikely(name[0] == '.')) {
- if (len < 2 || (len == 2 && name[1] == '.'))
- return ERR_PTR(-EACCES);
- }
-
- while (len--) {
- c = *(const unsigned char *)name++;
- if (c == '/' || c == '\0')
- return ERR_PTR(-EACCES);
- }
- /*
- * See if the low-level filesystem might want
- * to use its own hash..
- */
- if (base->d_flags & DCACHE_OP_HASH) {
- int err = base->d_op->d_hash(base, &this);
- if (err < 0)
- return ERR_PTR(err);
- }
-
- err = inode_permission(base->d_inode, MAY_EXEC);
+ err = lookup_one_len_common(name, base, len, &this);
if (err)
return ERR_PTR(err);
@@ -3374,9 +3367,7 @@ finish_open_created:
goto out;
*opened |= FILE_OPENED;
opened:
- error = open_check_o_direct(file);
- if (!error)
- error = ima_file_check(file, op->acc_mode, *opened);
+ error = ima_file_check(file, op->acc_mode, *opened);
if (!error && will_truncate)
error = handle_truncate(file);
out:
@@ -3456,9 +3447,6 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
error = finish_open(file, child, NULL, opened);
if (error)
goto out2;
- error = open_check_o_direct(file);
- if (error)
- fput(file);
out2:
mnt_drop_write(path.mnt);
out:
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c
index 0ee4b93d36ea..1c5d8d31fc0a 100644
--- a/fs/nfs/fscache-index.c
+++ b/fs/nfs/fscache-index.c
@@ -50,59 +50,6 @@ void nfs_fscache_unregister(void)
}
/*
- * Layout of the key for an NFS server cache object.
- */
-struct nfs_server_key {
- uint16_t nfsversion; /* NFS protocol version */
- uint16_t family; /* address family */
- uint16_t port; /* IP port */
- union {
- struct in_addr ipv4_addr; /* IPv4 address */
- struct in6_addr ipv6_addr; /* IPv6 address */
- } addr[0];
-};
-
-/*
- * Generate a key to describe a server in the main NFS index
- * - We return the length of the key, or 0 if we can't generate one
- */
-static uint16_t nfs_server_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct nfs_client *clp = cookie_netfs_data;
- const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr;
- const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr;
- struct nfs_server_key *key = buffer;
- uint16_t len = sizeof(struct nfs_server_key);
-
- memset(key, 0, len);
- key->nfsversion = clp->rpc_ops->version;
- key->family = clp->cl_addr.ss_family;
-
- switch (clp->cl_addr.ss_family) {
- case AF_INET:
- key->port = sin->sin_port;
- key->addr[0].ipv4_addr = sin->sin_addr;
- len += sizeof(key->addr[0].ipv4_addr);
- break;
-
- case AF_INET6:
- key->port = sin6->sin6_port;
- key->addr[0].ipv6_addr = sin6->sin6_addr;
- len += sizeof(key->addr[0].ipv6_addr);
- break;
-
- default:
- printk(KERN_WARNING "NFS: Unknown network family '%d'\n",
- clp->cl_addr.ss_family);
- len = 0;
- break;
- }
-
- return len;
-}
-
-/*
* Define the server object for FS-Cache. This is used to describe a server
* object to fscache_acquire_cookie(). It is keyed by the NFS protocol and
* server address parameters.
@@ -110,33 +57,9 @@ static uint16_t nfs_server_get_key(const void *cookie_netfs_data,
const struct fscache_cookie_def nfs_fscache_server_index_def = {
.name = "NFS.server",
.type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = nfs_server_get_key,
};
/*
- * Generate a key to describe a superblock key in the main NFS index
- */
-static uint16_t nfs_super_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct nfs_fscache_key *key;
- const struct nfs_server *nfss = cookie_netfs_data;
- uint16_t len;
-
- key = nfss->fscache_key;
- len = sizeof(key->key) + key->key.uniq_len;
- if (len > bufmax) {
- len = 0;
- } else {
- memcpy(buffer, &key->key, sizeof(key->key));
- memcpy(buffer + sizeof(key->key),
- key->key.uniquifier, key->key.uniq_len);
- }
-
- return len;
-}
-
-/*
* Define the superblock object for FS-Cache. This is used to describe a
* superblock object to fscache_acquire_cookie(). It is keyed by all the NFS
* parameters that might cause a separate superblock.
@@ -144,84 +67,9 @@ static uint16_t nfs_super_get_key(const void *cookie_netfs_data,
const struct fscache_cookie_def nfs_fscache_super_index_def = {
.name = "NFS.super",
.type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = nfs_super_get_key,
};
/*
- * Definition of the auxiliary data attached to NFS inode storage objects
- * within the cache.
- *
- * The contents of this struct are recorded in the on-disk local cache in the
- * auxiliary data attached to the data storage object backing an inode. This
- * permits coherency to be managed when a new inode binds to an already extant
- * cache object.
- */
-struct nfs_fscache_inode_auxdata {
- struct timespec mtime;
- struct timespec ctime;
- loff_t size;
- u64 change_attr;
-};
-
-/*
- * Generate a key to describe an NFS inode in an NFS server's index
- */
-static uint16_t nfs_fscache_inode_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct nfs_inode *nfsi = cookie_netfs_data;
- uint16_t nsize;
-
- /* use the inode's NFS filehandle as the key */
- nsize = nfsi->fh.size;
- memcpy(buffer, nfsi->fh.data, nsize);
- return nsize;
-}
-
-/*
- * Get certain file attributes from the netfs data
- * - This function can be absent for an index
- * - Not permitted to return an error
- * - The netfs data from the cookie being used as the source is presented
- */
-static void nfs_fscache_inode_get_attr(const void *cookie_netfs_data,
- uint64_t *size)
-{
- const struct nfs_inode *nfsi = cookie_netfs_data;
-
- *size = nfsi->vfs_inode.i_size;
-}
-
-/*
- * Get the auxiliary data from netfs data
- * - This function can be absent if the index carries no state data
- * - Should store the auxiliary data in the buffer
- * - Should return the amount of amount stored
- * - Not permitted to return an error
- * - The netfs data from the cookie being used as the source is presented
- */
-static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- struct nfs_fscache_inode_auxdata auxdata;
- const struct nfs_inode *nfsi = cookie_netfs_data;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.size = nfsi->vfs_inode.i_size;
- auxdata.mtime = nfsi->vfs_inode.i_mtime;
- auxdata.ctime = nfsi->vfs_inode.i_ctime;
-
- if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
- auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode);
-
- if (bufmax > sizeof(auxdata))
- bufmax = sizeof(auxdata);
-
- memcpy(buffer, &auxdata, bufmax);
- return bufmax;
-}
-
-/*
* Consult the netfs about the state of an object
* - This function can be absent if the index carries no state data
* - The netfs data from the cookie being used as the target is
@@ -230,7 +78,8 @@ static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data,
static
enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data,
const void *data,
- uint16_t datalen)
+ uint16_t datalen,
+ loff_t object_size)
{
struct nfs_fscache_inode_auxdata auxdata;
struct nfs_inode *nfsi = cookie_netfs_data;
@@ -239,7 +88,6 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data,
return FSCACHE_CHECKAUX_OBSOLETE;
memset(&auxdata, 0, sizeof(auxdata));
- auxdata.size = nfsi->vfs_inode.i_size;
auxdata.mtime = nfsi->vfs_inode.i_mtime;
auxdata.ctime = nfsi->vfs_inode.i_ctime;
@@ -288,9 +136,6 @@ static void nfs_fh_put_context(void *cookie_netfs_data, void *context)
const struct fscache_cookie_def nfs_fscache_inode_object_def = {
.name = "NFS.fh",
.type = FSCACHE_COOKIE_TYPE_DATAFILE,
- .get_key = nfs_fscache_inode_get_key,
- .get_attr = nfs_fscache_inode_get_attr,
- .get_aux = nfs_fscache_inode_get_aux,
.check_aux = nfs_fscache_inode_check_aux,
.get_context = nfs_fh_get_context,
.put_context = nfs_fh_put_context,
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index d63bea8bbfbb..b55fc7920c3b 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -18,6 +18,7 @@
#include <linux/in6.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
+#include <linux/iversion.h>
#include "internal.h"
#include "iostat.h"
@@ -29,6 +30,21 @@ static struct rb_root nfs_fscache_keys = RB_ROOT;
static DEFINE_SPINLOCK(nfs_fscache_keys_lock);
/*
+ * Layout of the key for an NFS server cache object.
+ */
+struct nfs_server_key {
+ struct {
+ uint16_t nfsversion; /* NFS protocol version */
+ uint16_t family; /* address family */
+ __be16 port; /* IP port */
+ } hdr;
+ union {
+ struct in_addr ipv4_addr; /* IPv4 address */
+ struct in6_addr ipv6_addr; /* IPv6 address */
+ };
+} __packed;
+
+/*
* Get the per-client index cookie for an NFS client if the appropriate mount
* flag was set
* - We always try and get an index cookie for the client, but get filehandle
@@ -36,10 +52,41 @@ static DEFINE_SPINLOCK(nfs_fscache_keys_lock);
*/
void nfs_fscache_get_client_cookie(struct nfs_client *clp)
{
+ const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr;
+ const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr;
+ struct nfs_server_key key;
+ uint16_t len = sizeof(key.hdr);
+
+ memset(&key, 0, sizeof(key));
+ key.hdr.nfsversion = clp->rpc_ops->version;
+ key.hdr.family = clp->cl_addr.ss_family;
+
+ switch (clp->cl_addr.ss_family) {
+ case AF_INET:
+ key.hdr.port = sin->sin_port;
+ key.ipv4_addr = sin->sin_addr;
+ len += sizeof(key.ipv4_addr);
+ break;
+
+ case AF_INET6:
+ key.hdr.port = sin6->sin6_port;
+ key.ipv6_addr = sin6->sin6_addr;
+ len += sizeof(key.ipv6_addr);
+ break;
+
+ default:
+ printk(KERN_WARNING "NFS: Unknown network family '%d'\n",
+ clp->cl_addr.ss_family);
+ clp->fscache = NULL;
+ return;
+ }
+
/* create a cache index for looking up filehandles */
clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index,
&nfs_fscache_server_index_def,
- clp, true);
+ &key, len,
+ NULL, 0,
+ clp, 0, true);
dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n",
clp, clp->fscache);
}
@@ -52,7 +99,7 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp)
dfprintk(FSCACHE, "NFS: releasing client cookie (0x%p/0x%p)\n",
clp, clp->fscache);
- fscache_relinquish_cookie(clp->fscache, 0);
+ fscache_relinquish_cookie(clp->fscache, NULL, false);
clp->fscache = NULL;
}
@@ -139,7 +186,9 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int
/* create a cache index for looking up filehandles */
nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache,
&nfs_fscache_super_index_def,
- nfss, true);
+ key, sizeof(*key) + ulen,
+ NULL, 0,
+ nfss, 0, true);
dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n",
nfss, nfss->fscache);
return;
@@ -163,7 +212,7 @@ void nfs_fscache_release_super_cookie(struct super_block *sb)
dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n",
nfss, nfss->fscache);
- fscache_relinquish_cookie(nfss->fscache, 0);
+ fscache_relinquish_cookie(nfss->fscache, NULL, false);
nfss->fscache = NULL;
if (nfss->fscache_key) {
@@ -180,14 +229,25 @@ void nfs_fscache_release_super_cookie(struct super_block *sb)
*/
void nfs_fscache_init_inode(struct inode *inode)
{
+ struct nfs_fscache_inode_auxdata auxdata;
struct nfs_inode *nfsi = NFS_I(inode);
nfsi->fscache = NULL;
if (!S_ISREG(inode->i_mode))
return;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.mtime = nfsi->vfs_inode.i_mtime;
+ auxdata.ctime = nfsi->vfs_inode.i_ctime;
+
+ if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
+ auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode);
+
nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache,
&nfs_fscache_inode_object_def,
- nfsi, false);
+ nfsi->fh.data, nfsi->fh.size,
+ &auxdata, sizeof(auxdata),
+ nfsi, nfsi->vfs_inode.i_size, false);
}
/*
@@ -195,12 +255,16 @@ void nfs_fscache_init_inode(struct inode *inode)
*/
void nfs_fscache_clear_inode(struct inode *inode)
{
+ struct nfs_fscache_inode_auxdata auxdata;
struct nfs_inode *nfsi = NFS_I(inode);
struct fscache_cookie *cookie = nfs_i_fscache(inode);
dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie);
- fscache_relinquish_cookie(cookie, false);
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.mtime = nfsi->vfs_inode.i_mtime;
+ auxdata.ctime = nfsi->vfs_inode.i_ctime;
+ fscache_relinquish_cookie(cookie, &auxdata, false);
nfsi->fscache = NULL;
}
@@ -232,20 +296,26 @@ static bool nfs_fscache_can_enable(void *data)
*/
void nfs_fscache_open_file(struct inode *inode, struct file *filp)
{
+ struct nfs_fscache_inode_auxdata auxdata;
struct nfs_inode *nfsi = NFS_I(inode);
struct fscache_cookie *cookie = nfs_i_fscache(inode);
if (!fscache_cookie_valid(cookie))
return;
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.mtime = nfsi->vfs_inode.i_mtime;
+ auxdata.ctime = nfsi->vfs_inode.i_ctime;
+
if (inode_is_open_for_write(inode)) {
dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi);
clear_bit(NFS_INO_FSCACHE, &nfsi->flags);
- fscache_disable_cookie(cookie, true);
+ fscache_disable_cookie(cookie, &auxdata, true);
fscache_uncache_all_inode_pages(cookie, inode);
} else {
dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi);
- fscache_enable_cookie(cookie, nfs_fscache_can_enable, inode);
+ fscache_enable_cookie(cookie, &auxdata, nfsi->vfs_inode.i_size,
+ nfs_fscache_can_enable, inode);
if (fscache_cookie_enabled(cookie))
set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
}
@@ -422,7 +492,8 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
"NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n",
nfs_i_fscache(inode), page, page->index, page->flags, sync);
- ret = fscache_write_page(nfs_i_fscache(inode), page, GFP_KERNEL);
+ ret = fscache_write_page(nfs_i_fscache(inode), page,
+ inode->i_size, GFP_KERNEL);
dfprintk(FSCACHE,
"NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n",
page, page->index, page->flags, ret);
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index d7fe3e799f2f..161ba2edb9d0 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -57,6 +57,21 @@ struct nfs_fscache_key {
};
/*
+ * Definition of the auxiliary data attached to NFS inode storage objects
+ * within the cache.
+ *
+ * The contents of this struct are recorded in the on-disk local cache in the
+ * auxiliary data attached to the data storage object backing an inode. This
+ * permits coherency to be managed when a new inode binds to an already extant
+ * cache object.
+ */
+struct nfs_fscache_inode_auxdata {
+ struct timespec mtime;
+ struct timespec ctime;
+ u64 change_attr;
+};
+
+/*
* fscache-index.c
*/
extern struct fscache_netfs nfs_fscache_netfs;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 65c9c4175145..b993ad282de2 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,7 +52,6 @@
#include <linux/nfs.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
-#include <linux/fs_struct.h>
#include "nfs4_fs.h"
#include "internal.h"
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 1d0ce3c57d93..6259a4b8579f 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -192,6 +192,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
struct nfsd3_writeres *resp = rqstp->rq_resp;
__be32 nfserr;
unsigned long cnt = argp->len;
+ unsigned int nvecs;
dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n",
SVCFH_fmt(&argp->fh),
@@ -201,9 +202,12 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
+ nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
+ if (!nvecs)
+ RETURN_STATUS(nfserr_io);
nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
- rqstp->rq_vec, argp->vlen,
- &cnt, resp->committed);
+ rqstp->rq_vec, nvecs, &cnt,
+ resp->committed);
resp->count = cnt;
RETURN_STATUS(nfserr);
}
@@ -279,6 +283,16 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
struct nfsd3_diropres *resp = rqstp->rq_resp;
__be32 nfserr;
+ if (argp->tlen == 0)
+ RETURN_STATUS(nfserr_inval);
+ if (argp->tlen > NFS3_MAXPATHLEN)
+ RETURN_STATUS(nfserr_nametoolong);
+
+ argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
+ argp->tlen);
+ if (IS_ERR(argp->tname))
+ RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
+
dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n",
SVCFH_fmt(&argp->ffh),
argp->flen, argp->fname,
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 1a70581e1cb2..3192b544a441 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -391,7 +391,7 @@ int
nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_writeargs *args = rqstp->rq_argp;
- unsigned int len, v, hdr, dlen;
+ unsigned int len, hdr, dlen;
u32 max_blocksize = svc_max_payload(rqstp);
struct kvec *head = rqstp->rq_arg.head;
struct kvec *tail = rqstp->rq_arg.tail;
@@ -433,17 +433,9 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
args->count = max_blocksize;
len = args->len = max_blocksize;
}
- rqstp->rq_vec[0].iov_base = (void*)p;
- rqstp->rq_vec[0].iov_len = head->iov_len - hdr;
- v = 0;
- while (len > rqstp->rq_vec[v].iov_len) {
- len -= rqstp->rq_vec[v].iov_len;
- v++;
- rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
- rqstp->rq_vec[v].iov_len = PAGE_SIZE;
- }
- rqstp->rq_vec[v].iov_len = len;
- args->vlen = v + 1;
+
+ args->first.iov_base = (void *)p;
+ args->first.iov_len = head->iov_len - hdr;
return 1;
}
@@ -489,51 +481,24 @@ int
nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_symlinkargs *args = rqstp->rq_argp;
- unsigned int len, avail;
- char *old, *new;
- struct kvec *vec;
+ char *base = (char *)p;
+ size_t dlen;
if (!(p = decode_fh(p, &args->ffh)) ||
- !(p = decode_filename(p, &args->fname, &args->flen))
- )
+ !(p = decode_filename(p, &args->fname, &args->flen)))
return 0;
p = decode_sattr3(p, &args->attrs);
- /* now decode the pathname, which might be larger than the first page.
- * As we have to check for nul's anyway, we copy it into a new page
- * This page appears in the rq_res.pages list, but as pages_len is always
- * 0, it won't get in the way
- */
- len = ntohl(*p++);
- if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
- return 0;
- args->tname = new = page_address(*(rqstp->rq_next_page++));
- args->tlen = len;
- /* first copy and check from the first page */
- old = (char*)p;
- vec = &rqstp->rq_arg.head[0];
- if ((void *)old > vec->iov_base + vec->iov_len)
- return 0;
- avail = vec->iov_len - (old - (char*)vec->iov_base);
- while (len && avail && *old) {
- *new++ = *old++;
- len--;
- avail--;
- }
- /* now copy next page if there is one */
- if (len && !avail && rqstp->rq_arg.page_len) {
- avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE);
- old = page_address(rqstp->rq_arg.pages[0]);
- }
- while (len && avail && *old) {
- *new++ = *old++;
- len--;
- avail--;
- }
- *new = '\0';
- if (len)
- return 0;
+ args->tlen = ntohl(*p++);
+
+ args->first.iov_base = p;
+ args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
+ args->first.iov_len -= (char *)p - base;
+ dlen = args->first.iov_len + rqstp->rq_arg.page_len +
+ rqstp->rq_arg.tail[0].iov_len;
+ if (dlen < XDR_QUADLEN(args->tlen) << 2)
+ return 0;
return 1;
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 49b0a9e7ff18..1f04d2a70d25 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -223,8 +223,8 @@ static int nfs_cb_stat_to_errno(int status)
return -status;
}
-static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
- int *status)
+static int decode_cb_op_status(struct xdr_stream *xdr,
+ enum nfs_cb_opnum4 expected, int *status)
{
__be32 *p;
u32 op;
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 7d888369f85a..228faf00a594 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -165,7 +165,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
struct nfs4_client *clp = ls->ls_stid.sc_client;
struct nfs4_file *fp = ls->ls_stid.sc_file;
- trace_layoutstate_free(&ls->ls_stid.sc_stateid);
+ trace_nfsd_layoutstate_free(&ls->ls_stid.sc_stateid);
spin_lock(&clp->cl_lock);
list_del_init(&ls->ls_perclnt);
@@ -264,7 +264,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
list_add(&ls->ls_perfile, &fp->fi_lo_states);
spin_unlock(&fp->fi_lock);
- trace_layoutstate_alloc(&ls->ls_stid.sc_stateid);
+ trace_nfsd_layoutstate_alloc(&ls->ls_stid.sc_stateid);
return ls;
}
@@ -334,7 +334,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
if (list_empty(&ls->ls_layouts))
goto out_unlock;
- trace_layout_recall(&ls->ls_stid.sc_stateid);
+ trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid);
refcount_inc(&ls->ls_stid.sc_count);
nfsd4_run_cb(&ls->ls_recall);
@@ -507,7 +507,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
false, lrp->lr_layout_type,
&ls);
if (nfserr) {
- trace_layout_return_lookup_fail(&lrp->lr_sid);
+ trace_nfsd_layout_return_lookup_fail(&lrp->lr_sid);
return nfserr;
}
@@ -523,7 +523,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid);
lrp->lrs_present = 1;
} else {
- trace_layoutstate_unhash(&ls->ls_stid.sc_stateid);
+ trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid);
nfs4_unhash_stid(&ls->ls_stid);
lrp->lrs_present = 0;
}
@@ -694,7 +694,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
/*
* Unknown error or non-responding client, we'll need to fence.
*/
- trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
+ trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid);
ops = nfsd4_layout_ops[ls->ls_layout_type];
if (ops->fence_client)
@@ -703,7 +703,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
nfsd4_cb_layout_fail(ls);
return -1;
case -NFS4ERR_NOMATCHING_LAYOUT:
- trace_layout_recall_done(&ls->ls_stid.sc_stateid);
+ trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
task->tk_status = 0;
return 1;
}
@@ -716,7 +716,7 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb)
container_of(cb, struct nfs4_layout_stateid, ls_recall);
LIST_HEAD(reaplist);
- trace_layout_recall_release(&ls->ls_stid.sc_stateid);
+ trace_nfsd_layout_recall_release(&ls->ls_stid.sc_stateid);
nfsd4_return_all_layouts(ls, &reaplist);
nfsd4_free_layouts(&reaplist);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a0bed2b2004d..5d99e8810b85 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -32,6 +32,7 @@
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/fs_struct.h>
#include <linux/file.h>
#include <linux/falloc.h>
#include <linux/slab.h>
@@ -252,11 +253,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
* Note: create modes (UNCHECKED,GUARDED...) are the same
* in NFSv4 as in v3 except EXCLUSIVE4_1.
*/
+ current->fs->umask = open->op_umask;
status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
open->op_fname.len, &open->op_iattr,
*resfh, open->op_createmode,
(u32 *)open->op_verf.data,
&open->op_truncate, &open->op_created);
+ current->fs->umask = 0;
if (!status && open->op_label.len)
nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval);
@@ -603,6 +606,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
return status;
+ current->fs->umask = create->cr_umask;
switch (create->cr_type) {
case NF4LNK:
status = nfsd_symlink(rqstp, &cstate->current_fh,
@@ -611,20 +615,22 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break;
case NF4BLK:
+ status = nfserr_inval;
rdev = MKDEV(create->cr_specdata1, create->cr_specdata2);
if (MAJOR(rdev) != create->cr_specdata1 ||
MINOR(rdev) != create->cr_specdata2)
- return nfserr_inval;
+ goto out_umask;
status = nfsd_create(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
&create->cr_iattr, S_IFBLK, rdev, &resfh);
break;
case NF4CHR:
+ status = nfserr_inval;
rdev = MKDEV(create->cr_specdata1, create->cr_specdata2);
if (MAJOR(rdev) != create->cr_specdata1 ||
MINOR(rdev) != create->cr_specdata2)
- return nfserr_inval;
+ goto out_umask;
status = nfsd_create(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
&create->cr_iattr,S_IFCHR, rdev, &resfh);
@@ -668,6 +674,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fh_dup2(&cstate->current_fh, &resfh);
out:
fh_put(&resfh);
+out_umask:
+ current->fs->umask = 0;
return status;
}
@@ -751,6 +759,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (read->rd_offset >= OFFSET_MAX)
return nfserr_inval;
+ trace_nfsd_read_start(rqstp, &cstate->current_fh,
+ read->rd_offset, read->rd_length);
+
/*
* If we do a zero copy read, then a client will see read data
* that reflects the state of the file *after* performing the
@@ -783,6 +794,8 @@ nfsd4_read_release(union nfsd4_op_u *u)
{
if (u->read.rd_filp)
fput(u->read.rd_filp);
+ trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
+ u->read.rd_offset, u->read.rd_length);
}
static __be32
@@ -1001,6 +1014,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (write->wr_offset >= OFFSET_MAX)
return nfserr_inval;
+ cnt = write->wr_buflen;
+ trace_nfsd_write_start(rqstp, &cstate->current_fh,
+ write->wr_offset, cnt);
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
stateid, WR_STATE, &filp, NULL);
if (status) {
@@ -1008,7 +1024,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
}
- cnt = write->wr_buflen;
write->wr_how_written = write->wr_stable_how;
gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
@@ -1021,7 +1036,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fput(filp);
write->wr_bytes_written = cnt;
-
+ trace_nfsd_write_done(rqstp, &cstate->current_fh,
+ write->wr_offset, cnt);
return status;
}
@@ -1106,7 +1122,6 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
else {
copy->cp_res.wr_bytes_written = bytes;
copy->cp_res.wr_stable_how = NFS_UNSTABLE;
- copy->cp_consecutive = 1;
copy->cp_synchronous = 1;
gen_boot_verifier(&copy->cp_res.wr_verifier, SVC_NET(rqstp));
status = nfs_ok;
@@ -1412,7 +1427,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid,
true, lgp->lg_layout_type, &ls);
if (nfserr) {
- trace_layout_get_lookup_fail(&lgp->lg_sid);
+ trace_nfsd_layout_get_lookup_fail(&lgp->lg_sid);
goto out;
}
@@ -1481,7 +1496,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
false, lcp->lc_layout_type,
&ls);
if (nfserr) {
- trace_layout_commit_lookup_fail(&lcp->lc_sid);
+ trace_nfsd_layout_commit_lookup_fail(&lcp->lc_sid);
/* fixup error code as per RFC5661 */
if (nfserr == nfserr_bad_stateid)
nfserr = nfserr_badlayout;
@@ -1714,12 +1729,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
goto encode_op;
}
+ trace_nfsd_compound(rqstp, args->opcnt);
while (!status && resp->opcnt < args->opcnt) {
op = &args->ops[resp->opcnt++];
- dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
- resp->opcnt, args->opcnt, op->opnum,
- nfsd4_op_name(op->opnum));
/*
* The XDR decode routines may have pre-set op->status;
* for example, if there is a miscellaneous XDR error
@@ -1793,9 +1806,8 @@ encode_op:
status = op->status;
}
- dprintk("nfsv4 compound op %p opcnt %d #%d: %d: status %d\n",
- args->ops, args->opcnt, resp->opcnt, op->opnum,
- be32_to_cpu(status));
+ trace_nfsd_compound_status(args->opcnt, resp->opcnt, status,
+ nfsd4_op_name(op->opnum));
nfsd4_cstate_clear_replay(cstate);
nfsd4_increment_op_stats(op->opnum);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 61b770e39809..fc74d6f46bd5 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -98,6 +98,7 @@ enum nfsd4_st_mutex_lock_subclass {
*/
static DECLARE_WAIT_QUEUE_HEAD(close_wq);
+static struct kmem_cache *client_slab;
static struct kmem_cache *openowner_slab;
static struct kmem_cache *lockowner_slab;
static struct kmem_cache *file_slab;
@@ -806,7 +807,8 @@ static void block_delegations(struct knfsd_fh *fh)
}
static struct nfs4_delegation *
-alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
+alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
+ struct svc_fh *current_fh,
struct nfs4_clnt_odstate *odstate)
{
struct nfs4_delegation *dp;
@@ -837,6 +839,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
dp->dl_retries = 1;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
+ get_nfs4_file(fp);
+ dp->dl_stid.sc_file = fp;
return dp;
out_dec:
atomic_long_dec(&num_delegations);
@@ -874,19 +878,35 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
spin_unlock(&stid->sc_lock);
}
-static void nfs4_put_deleg_lease(struct nfs4_file *fp)
+static void put_deleg_file(struct nfs4_file *fp)
{
struct file *filp = NULL;
spin_lock(&fp->fi_lock);
- if (fp->fi_deleg_file && --fp->fi_delegees == 0)
+ if (--fp->fi_delegees == 0)
swap(filp, fp->fi_deleg_file);
spin_unlock(&fp->fi_lock);
- if (filp) {
- vfs_setlease(filp, F_UNLCK, NULL, (void **)&fp);
+ if (filp)
fput(filp);
- }
+}
+
+static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
+{
+ struct nfs4_file *fp = dp->dl_stid.sc_file;
+ struct file *filp = fp->fi_deleg_file;
+
+ WARN_ON_ONCE(!fp->fi_delegees);
+
+ vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp);
+ put_deleg_file(fp);
+}
+
+static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
+{
+ put_clnt_odstate(dp->dl_clnt_odstate);
+ nfs4_unlock_deleg_lease(dp);
+ nfs4_put_stid(&dp->dl_stid);
}
void nfs4_unhash_stid(struct nfs4_stid *s)
@@ -895,20 +915,16 @@ void nfs4_unhash_stid(struct nfs4_stid *s)
}
/**
- * nfs4_get_existing_delegation - Discover if this delegation already exists
+ * nfs4_delegation_exists - Discover if this delegation already exists
* @clp: a pointer to the nfs4_client we're granting a delegation to
* @fp: a pointer to the nfs4_file we're granting a delegation on
*
* Return:
- * On success: NULL if an existing delegation was not found.
- *
- * On error: -EAGAIN if one was previously granted to this nfs4_client
- * for this nfs4_file.
- *
+ * On success: true iff an existing delegation is found
*/
-static int
-nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
+static bool
+nfs4_delegation_exists(struct nfs4_client *clp, struct nfs4_file *fp)
{
struct nfs4_delegation *searchdp = NULL;
struct nfs4_client *searchclp = NULL;
@@ -919,10 +935,10 @@ nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) {
searchclp = searchdp->dl_stid.sc_client;
if (clp == searchclp) {
- return -EAGAIN;
+ return true;
}
}
- return 0;
+ return false;
}
/**
@@ -941,16 +957,13 @@ nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
static int
hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
{
- int status;
struct nfs4_client *clp = dp->dl_stid.sc_client;
lockdep_assert_held(&state_lock);
lockdep_assert_held(&fp->fi_lock);
- status = nfs4_get_existing_delegation(clp, fp);
- if (status)
- return status;
- ++fp->fi_delegees;
+ if (nfs4_delegation_exists(clp, fp))
+ return -EAGAIN;
refcount_inc(&dp->dl_stid.sc_count);
dp->dl_stid.sc_type = NFS4_DELEG_STID;
list_add(&dp->dl_perfile, &fp->fi_delegations);
@@ -986,11 +999,8 @@ static void destroy_delegation(struct nfs4_delegation *dp)
spin_lock(&state_lock);
unhashed = unhash_delegation_locked(dp);
spin_unlock(&state_lock);
- if (unhashed) {
- put_clnt_odstate(dp->dl_clnt_odstate);
- nfs4_put_deleg_lease(dp->dl_stid.sc_file);
- nfs4_put_stid(&dp->dl_stid);
- }
+ if (unhashed)
+ destroy_unhashed_deleg(dp);
}
static void revoke_delegation(struct nfs4_delegation *dp)
@@ -999,17 +1009,14 @@ static void revoke_delegation(struct nfs4_delegation *dp)
WARN_ON(!list_empty(&dp->dl_recall_lru));
- put_clnt_odstate(dp->dl_clnt_odstate);
- nfs4_put_deleg_lease(dp->dl_stid.sc_file);
-
- if (clp->cl_minorversion == 0)
- nfs4_put_stid(&dp->dl_stid);
- else {
+ if (clp->cl_minorversion) {
dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
+ refcount_inc(&dp->dl_stid.sc_count);
spin_lock(&clp->cl_lock);
list_add(&dp->dl_recall_lru, &clp->cl_revoked);
spin_unlock(&clp->cl_lock);
}
+ destroy_unhashed_deleg(dp);
}
/*
@@ -1794,7 +1801,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
struct nfs4_client *clp;
int i;
- clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
+ clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
if (clp == NULL)
return NULL;
clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
@@ -1825,7 +1832,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
err_no_hashtbl:
kfree(clp->cl_name.data);
err_no_name:
- kfree(clp);
+ kmem_cache_free(client_slab, clp);
return NULL;
}
@@ -1845,7 +1852,7 @@ free_client(struct nfs4_client *clp)
kfree(clp->cl_ownerstr_hashtbl);
kfree(clp->cl_name.data);
idr_destroy(&clp->cl_stateids);
- kfree(clp);
+ kmem_cache_free(client_slab, clp);
}
/* must be called under the client_lock */
@@ -1911,9 +1918,7 @@ __destroy_client(struct nfs4_client *clp)
while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
- put_clnt_odstate(dp->dl_clnt_odstate);
- nfs4_put_deleg_lease(dp->dl_stid.sc_file);
- nfs4_put_stid(&dp->dl_stid);
+ destroy_unhashed_deleg(dp);
}
while (!list_empty(&clp->cl_revoked)) {
dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru);
@@ -2953,7 +2958,7 @@ out_no_session:
static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
{
if (!session)
- return 0;
+ return false;
return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
}
@@ -3471,21 +3476,26 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
void
nfsd4_free_slabs(void)
{
- kmem_cache_destroy(odstate_slab);
+ kmem_cache_destroy(client_slab);
kmem_cache_destroy(openowner_slab);
kmem_cache_destroy(lockowner_slab);
kmem_cache_destroy(file_slab);
kmem_cache_destroy(stateid_slab);
kmem_cache_destroy(deleg_slab);
+ kmem_cache_destroy(odstate_slab);
}
int
nfsd4_init_slabs(void)
{
+ client_slab = kmem_cache_create("nfsd4_clients",
+ sizeof(struct nfs4_client), 0, 0, NULL);
+ if (client_slab == NULL)
+ goto out;
openowner_slab = kmem_cache_create("nfsd4_openowners",
sizeof(struct nfs4_openowner), 0, 0, NULL);
if (openowner_slab == NULL)
- goto out;
+ goto out_free_client_slab;
lockowner_slab = kmem_cache_create("nfsd4_lockowners",
sizeof(struct nfs4_lockowner), 0, 0, NULL);
if (lockowner_slab == NULL)
@@ -3518,6 +3528,8 @@ out_free_lockowner_slab:
kmem_cache_destroy(lockowner_slab);
out_free_openowner_slab:
kmem_cache_destroy(openowner_slab);
+out_free_client_slab:
+ kmem_cache_destroy(client_slab);
out:
dprintk("nfsd4: out of memory while initializing nfsv4\n");
return -ENOMEM;
@@ -3945,17 +3957,9 @@ static bool
nfsd_break_deleg_cb(struct file_lock *fl)
{
bool ret = false;
- struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
- struct nfs4_delegation *dp;
+ struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
+ struct nfs4_file *fp = dp->dl_stid.sc_file;
- if (!fp) {
- WARN(1, "(%p)->fl_owner NULL\n", fl);
- return ret;
- }
- if (fp->fi_had_conflict) {
- WARN(1, "duplicate break on %p\n", fp);
- return ret;
- }
/*
* We don't want the locks code to timeout the lease for us;
* we'll remove it ourself if a delegation isn't returned
@@ -3965,15 +3969,7 @@ nfsd_break_deleg_cb(struct file_lock *fl)
spin_lock(&fp->fi_lock);
fp->fi_had_conflict = true;
- /*
- * If there are no delegations on the list, then return true
- * so that the lease code will go ahead and delete it.
- */
- if (list_empty(&fp->fi_delegations))
- ret = true;
- else
- list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
- nfsd_break_one_deleg(dp);
+ nfsd_break_one_deleg(dp);
spin_unlock(&fp->fi_lock);
return ret;
}
@@ -4297,7 +4293,8 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
}
-static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
+static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
+ int flag)
{
struct file_lock *fl;
@@ -4308,124 +4305,88 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
fl->fl_flags = FL_DELEG;
fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
fl->fl_end = OFFSET_MAX;
- fl->fl_owner = (fl_owner_t)fp;
+ fl->fl_owner = (fl_owner_t)dp;
fl->fl_pid = current->tgid;
+ fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file;
return fl;
}
-/**
- * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer
- * @dp: a pointer to the nfs4_delegation we're adding.
- *
- * Return:
- * On success: Return code will be 0 on success.
- *
- * On error: -EAGAIN if there was an existing delegation.
- * nonzero if there is an error in other cases.
- *
- */
-
-static int nfs4_setlease(struct nfs4_delegation *dp)
-{
- struct nfs4_file *fp = dp->dl_stid.sc_file;
- struct file_lock *fl;
- struct file *filp;
- int status = 0;
-
- fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ);
- if (!fl)
- return -ENOMEM;
- filp = find_readable_file(fp);
- if (!filp) {
- /* We should always have a readable file here */
- WARN_ON_ONCE(1);
- locks_free_lock(fl);
- return -EBADF;
- }
- fl->fl_file = filp;
- status = vfs_setlease(filp, fl->fl_type, &fl, NULL);
- if (fl)
- locks_free_lock(fl);
- if (status)
- goto out_fput;
- spin_lock(&state_lock);
- spin_lock(&fp->fi_lock);
- /* Did the lease get broken before we took the lock? */
- status = -EAGAIN;
- if (fp->fi_had_conflict)
- goto out_unlock;
- /* Race breaker */
- if (fp->fi_deleg_file) {
- status = hash_delegation_locked(dp, fp);
- goto out_unlock;
- }
- fp->fi_deleg_file = filp;
- fp->fi_delegees = 0;
- status = hash_delegation_locked(dp, fp);
- spin_unlock(&fp->fi_lock);
- spin_unlock(&state_lock);
- if (status) {
- /* Should never happen, this is a new fi_deleg_file */
- WARN_ON_ONCE(1);
- goto out_fput;
- }
- return 0;
-out_unlock:
- spin_unlock(&fp->fi_lock);
- spin_unlock(&state_lock);
-out_fput:
- fput(filp);
- return status;
-}
-
static struct nfs4_delegation *
nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
{
- int status;
+ int status = 0;
struct nfs4_delegation *dp;
+ struct file *filp;
+ struct file_lock *fl;
+ /*
+ * The fi_had_conflict and nfs_get_existing_delegation checks
+ * here are just optimizations; we'll need to recheck them at
+ * the end:
+ */
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
+ filp = find_readable_file(fp);
+ if (!filp) {
+ /* We should always have a readable file here */
+ WARN_ON_ONCE(1);
+ return ERR_PTR(-EBADF);
+ }
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
- status = nfs4_get_existing_delegation(clp, fp);
+ if (nfs4_delegation_exists(clp, fp))
+ status = -EAGAIN;
+ else if (!fp->fi_deleg_file) {
+ fp->fi_deleg_file = filp;
+ /* increment early to prevent fi_deleg_file from being
+ * cleared */
+ fp->fi_delegees = 1;
+ filp = NULL;
+ } else
+ fp->fi_delegees++;
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
-
+ if (filp)
+ fput(filp);
if (status)
return ERR_PTR(status);
- dp = alloc_init_deleg(clp, fh, odstate);
+ status = -ENOMEM;
+ dp = alloc_init_deleg(clp, fp, fh, odstate);
if (!dp)
- return ERR_PTR(-ENOMEM);
+ goto out_delegees;
+
+ fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ);
+ if (!fl)
+ goto out_stid;
+
+ status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL);
+ if (fl)
+ locks_free_lock(fl);
+ if (status)
+ goto out_clnt_odstate;
- get_nfs4_file(fp);
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
- dp->dl_stid.sc_file = fp;
- if (!fp->fi_deleg_file) {
- spin_unlock(&fp->fi_lock);
- spin_unlock(&state_lock);
- status = nfs4_setlease(dp);
- goto out;
- }
- if (fp->fi_had_conflict) {
+ if (fp->fi_had_conflict)
status = -EAGAIN;
- goto out_unlock;
- }
- status = hash_delegation_locked(dp, fp);
-out_unlock:
+ else
+ status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
-out:
- if (status) {
- put_clnt_odstate(dp->dl_clnt_odstate);
- nfs4_put_stid(&dp->dl_stid);
- return ERR_PTR(status);
- }
+
+ if (status)
+ destroy_unhashed_deleg(dp);
return dp;
+out_clnt_odstate:
+ put_clnt_odstate(dp->dl_clnt_odstate);
+out_stid:
+ nfs4_put_stid(&dp->dl_stid);
+out_delegees:
+ put_deleg_file(fp);
+ return ERR_PTR(status);
}
static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
@@ -5521,15 +5482,26 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
stp->st_stid.sc_type = NFS4_CLOSED_STID;
+
+ /*
+ * Technically we don't _really_ have to increment or copy it, since
+ * it should just be gone after this operation and we clobber the
+ * copied value below, but we continue to do so here just to ensure
+ * that racing ops see that there was a state change.
+ */
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
nfsd4_close_open_stateid(stp);
mutex_unlock(&stp->st_mutex);
- /* See RFC5661 sectionm 18.2.4 */
- if (stp->st_stid.sc_client->cl_minorversion)
- memcpy(&close->cl_stateid, &close_stateid,
- sizeof(close->cl_stateid));
+ /* v4.1+ suggests that we send a special stateid in here, since the
+ * clients should just ignore this anyway. Since this is not useful
+ * for v4.0 clients either, we set it to the special close_stateid
+ * universally.
+ *
+ * See RFC5661 section 18.2.4, and RFC7530 section 16.2.5
+ */
+ memcpy(&close->cl_stateid, &close_stateid, sizeof(close->cl_stateid));
/* put reference from nfs4_preprocess_seqid_op */
nfs4_put_stid(&stp->st_stid);
@@ -7264,9 +7236,7 @@ nfs4_state_shutdown_net(struct net *net)
list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
- put_clnt_odstate(dp->dl_clnt_odstate);
- nfs4_put_deleg_lease(dp->dl_stid.sc_file);
- nfs4_put_stid(&dp->dl_stid);
+ destroy_unhashed_deleg(dp);
}
nfsd4_client_tracking_exit(net);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e502fd16246b..1d048dd95464 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -33,7 +33,6 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <linux/fs_struct.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/namei.h>
@@ -682,7 +681,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
&create->cr_acl, &create->cr_label,
- &current->fs->umask);
+ &create->cr_umask);
if (status)
goto out;
@@ -927,7 +926,6 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
case NFS4_OPEN_NOCREATE:
break;
case NFS4_OPEN_CREATE:
- current->fs->umask = 0;
READ_BUF(4);
open->op_createmode = be32_to_cpup(p++);
switch (open->op_createmode) {
@@ -935,7 +933,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
case NFS4_CREATE_GUARDED:
status = nfsd4_decode_fattr(argp, open->op_bmval,
&open->op_iattr, &open->op_acl, &open->op_label,
- &current->fs->umask);
+ &open->op_umask);
if (status)
goto out;
break;
@@ -950,7 +948,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
status = nfsd4_decode_fattr(argp, open->op_bmval,
&open->op_iattr, &open->op_acl, &open->op_label,
- &current->fs->umask);
+ &open->op_umask);
if (status)
goto out;
break;
@@ -1759,7 +1757,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
p = xdr_decode_hyper(p, &copy->cp_src_pos);
p = xdr_decode_hyper(p, &copy->cp_dst_pos);
p = xdr_decode_hyper(p, &copy->cp_count);
- copy->cp_consecutive = be32_to_cpup(p++);
+ p++; /* ca_consecutive: we always do consecutive copies */
copy->cp_synchronous = be32_to_cpup(p++);
tmp = be32_to_cpup(p); /* Source server list not supported */
@@ -3427,8 +3425,9 @@ static __be32 nfsd4_encode_splice_read(
return nfserr_resource;
len = maxcount;
- nfserr = nfsd_splice_read(read->rd_rqstp, file,
- read->rd_offset, &maxcount);
+ nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
+ file, read->rd_offset, &maxcount);
+ read->rd_length = maxcount;
if (nfserr) {
/*
* nfsd_splice_actor may have already messed with the
@@ -3511,8 +3510,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
read->rd_vlen = v;
len = maxcount;
- nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
- read->rd_vlen, &maxcount);
+ nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
+ resp->rqstp->rq_vec, read->rd_vlen, &maxcount);
+ read->rd_length = maxcount;
if (nfserr)
return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
@@ -4214,7 +4214,7 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr;
p = xdr_reserve_space(&resp->xdr, 4 + 4);
- *p++ = cpu_to_be32(copy->cp_consecutive);
+ *p++ = xdr_one; /* cr_consecutive */
*p++ = cpu_to_be32(copy->cp_synchronous);
return 0;
}
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 8aa011820c4a..a008e7634181 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -87,13 +87,23 @@ nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry,
return nfserr_inval;
}
+static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, int flags)
+{
+ if (flags & NFSEXP_INSECURE_PORT)
+ return true;
+ /* We don't require gss requests to use low ports: */
+ if (rqstp->rq_cred.cr_flavor >= RPC_AUTH_GSS)
+ return true;
+ return test_bit(RQ_SECURE, &rqstp->rq_flags);
+}
+
static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
struct svc_export *exp)
{
int flags = nfsexp_flags(rqstp, exp);
/* Check if the request originated from a secure port. */
- if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && !(flags & NFSEXP_INSECURE_PORT)) {
+ if (!nfsd_originating_port_ok(rqstp, flags)) {
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("nfsd: request from insecure port %s!\n",
svc_print_addr(rqstp, buf, sizeof(buf)));
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 43c0419b8ddb..f107f9fa8e15 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -212,13 +212,18 @@ nfsd_proc_write(struct svc_rqst *rqstp)
struct nfsd_attrstat *resp = rqstp->rq_resp;
__be32 nfserr;
unsigned long cnt = argp->len;
+ unsigned int nvecs;
dprintk("nfsd: WRITE %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
- nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset,
- rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC);
+ nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
+ if (!nvecs)
+ return nfserr_io;
+ nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
+ argp->offset, rqstp->rq_vec, nvecs,
+ &cnt, NFS_DATA_SYNC);
return nfsd_return_attrs(nfserr, resp);
}
@@ -444,17 +449,19 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
struct svc_fh newfh;
__be32 nfserr;
+ if (argp->tlen > NFS_MAXPATHLEN)
+ return nfserr_nametoolong;
+
+ argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
+ argp->tlen);
+ if (IS_ERR(argp->tname))
+ return nfserrno(PTR_ERR(argp->tname));
+
dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n",
SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
argp->tlen, argp->tname);
fh_init(&newfh, NFS_FHSIZE);
- /*
- * Crazy hack: the request fits in a page, and already-decoded
- * attributes follow argp->tname, so it's safe to just write a
- * null to ensure it's null-terminated:
- */
- argp->tname[argp->tlen] = '\0';
nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
argp->tname, &newfh);
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 79b6064f8977..a43e8260520a 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -71,22 +71,6 @@ decode_filename(__be32 *p, char **namp, unsigned int *lenp)
}
static __be32 *
-decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
-{
- char *name;
- unsigned int i;
-
- if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
- for (i = 0, name = *namp; i < *lenp; i++, name++) {
- if (*name == '\0')
- return NULL;
- }
- }
-
- return p;
-}
-
-static __be32 *
decode_sattr(__be32 *p, struct iattr *iap)
{
u32 tmp, tmp1;
@@ -287,7 +271,6 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
struct nfsd_writeargs *args = rqstp->rq_argp;
unsigned int len, hdr, dlen;
struct kvec *head = rqstp->rq_arg.head;
- int v;
p = decode_fh(p, &args->fh);
if (!p)
@@ -323,17 +306,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
if (dlen < XDR_QUADLEN(len)*4)
return 0;
- rqstp->rq_vec[0].iov_base = (void*)p;
- rqstp->rq_vec[0].iov_len = head->iov_len - hdr;
- v = 0;
- while (len > rqstp->rq_vec[v].iov_len) {
- len -= rqstp->rq_vec[v].iov_len;
- v++;
- rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
- rqstp->rq_vec[v].iov_len = PAGE_SIZE;
- }
- rqstp->rq_vec[v].iov_len = len;
- args->vlen = v + 1;
+ args->first.iov_base = (void *)p;
+ args->first.iov_len = head->iov_len - hdr;
return 1;
}
@@ -394,14 +368,39 @@ int
nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_symlinkargs *args = rqstp->rq_argp;
+ char *base = (char *)p;
+ size_t xdrlen;
if ( !(p = decode_fh(p, &args->ffh))
- || !(p = decode_filename(p, &args->fname, &args->flen))
- || !(p = decode_pathname(p, &args->tname, &args->tlen)))
+ || !(p = decode_filename(p, &args->fname, &args->flen)))
return 0;
- p = decode_sattr(p, &args->attrs);
- return xdr_argsize_check(rqstp, p);
+ args->tlen = ntohl(*p++);
+ if (args->tlen == 0)
+ return 0;
+
+ args->first.iov_base = p;
+ args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
+ args->first.iov_len -= (char *)p - base;
+
+ /* This request is never larger than a page. Therefore,
+ * transport will deliver either:
+ * 1. pathname in the pagelist -> sattr is in the tail.
+ * 2. everything in the head buffer -> sattr is in the head.
+ */
+ if (rqstp->rq_arg.page_len) {
+ if (args->tlen != rqstp->rq_arg.page_len)
+ return 0;
+ p = rqstp->rq_arg.tail[0].iov_base;
+ } else {
+ xdrlen = XDR_QUADLEN(args->tlen);
+ if (xdrlen > args->first.iov_len - (8 * sizeof(__be32)))
+ return 0;
+ p += xdrlen;
+ }
+ decode_sattr(p, &args->attrs);
+
+ return 1;
}
int
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 8b2f1d92c579..80933e4334d8 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -11,39 +11,79 @@
#include <linux/tracepoint.h>
#include "nfsfh.h"
+TRACE_EVENT(nfsd_compound,
+ TP_PROTO(const struct svc_rqst *rqst,
+ u32 args_opcnt),
+ TP_ARGS(rqst, args_opcnt),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, args_opcnt)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ __entry->args_opcnt = args_opcnt;
+ ),
+ TP_printk("xid=0x%08x opcnt=%u",
+ __entry->xid, __entry->args_opcnt)
+)
+
+TRACE_EVENT(nfsd_compound_status,
+ TP_PROTO(u32 args_opcnt,
+ u32 resp_opcnt,
+ __be32 status,
+ const char *name),
+ TP_ARGS(args_opcnt, resp_opcnt, status, name),
+ TP_STRUCT__entry(
+ __field(u32, args_opcnt)
+ __field(u32, resp_opcnt)
+ __field(int, status)
+ __string(name, name)
+ ),
+ TP_fast_assign(
+ __entry->args_opcnt = args_opcnt;
+ __entry->resp_opcnt = resp_opcnt;
+ __entry->status = be32_to_cpu(status);
+ __assign_str(name, name);
+ ),
+ TP_printk("op=%u/%u %s status=%d",
+ __entry->resp_opcnt, __entry->args_opcnt,
+ __get_str(name), __entry->status)
+)
+
DECLARE_EVENT_CLASS(nfsd_io_class,
TP_PROTO(struct svc_rqst *rqstp,
struct svc_fh *fhp,
loff_t offset,
- int len),
+ unsigned long len),
TP_ARGS(rqstp, fhp, offset, len),
TP_STRUCT__entry(
- __field(__be32, xid)
- __field_struct(struct knfsd_fh, fh)
+ __field(u32, xid)
+ __field(u32, fh_hash)
__field(loff_t, offset)
- __field(int, len)
+ __field(unsigned long, len)
),
TP_fast_assign(
- __entry->xid = rqstp->rq_xid,
- fh_copy_shallow(&__entry->fh, &fhp->fh_handle);
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
__entry->offset = offset;
__entry->len = len;
),
- TP_printk("xid=0x%x fh=0x%x offset=%lld len=%d",
- __be32_to_cpu(__entry->xid), knfsd_fh_hash(&__entry->fh),
+ TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu",
+ __entry->xid, __entry->fh_hash,
__entry->offset, __entry->len)
)
#define DEFINE_NFSD_IO_EVENT(name) \
-DEFINE_EVENT(nfsd_io_class, name, \
+DEFINE_EVENT(nfsd_io_class, nfsd_##name, \
TP_PROTO(struct svc_rqst *rqstp, \
struct svc_fh *fhp, \
loff_t offset, \
- int len), \
+ unsigned long len), \
TP_ARGS(rqstp, fhp, offset, len))
DEFINE_NFSD_IO_EVENT(read_start);
-DEFINE_NFSD_IO_EVENT(read_opened);
+DEFINE_NFSD_IO_EVENT(read_splice);
+DEFINE_NFSD_IO_EVENT(read_vector);
DEFINE_NFSD_IO_EVENT(read_io_done);
DEFINE_NFSD_IO_EVENT(read_done);
DEFINE_NFSD_IO_EVENT(write_start);
@@ -51,6 +91,40 @@ DEFINE_NFSD_IO_EVENT(write_opened);
DEFINE_NFSD_IO_EVENT(write_io_done);
DEFINE_NFSD_IO_EVENT(write_done);
+DECLARE_EVENT_CLASS(nfsd_err_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+ struct svc_fh *fhp,
+ loff_t offset,
+ int status),
+ TP_ARGS(rqstp, fhp, offset, status),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, fh_hash)
+ __field(loff_t, offset)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->offset = offset;
+ __entry->status = status;
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld status=%d",
+ __entry->xid, __entry->fh_hash,
+ __entry->offset, __entry->status)
+)
+
+#define DEFINE_NFSD_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_err_class, nfsd_##name, \
+ TP_PROTO(struct svc_rqst *rqstp, \
+ struct svc_fh *fhp, \
+ loff_t offset, \
+ int len), \
+ TP_ARGS(rqstp, fhp, offset, len))
+
+DEFINE_NFSD_ERR_EVENT(read_err);
+DEFINE_NFSD_ERR_EVENT(write_err);
+
#include "state.h"
DECLARE_EVENT_CLASS(nfsd_stateid_class,
@@ -76,7 +150,7 @@ DECLARE_EVENT_CLASS(nfsd_stateid_class,
)
#define DEFINE_STATEID_EVENT(name) \
-DEFINE_EVENT(nfsd_stateid_class, name, \
+DEFINE_EVENT(nfsd_stateid_class, nfsd_##name, \
TP_PROTO(stateid_t *stp), \
TP_ARGS(stp))
DEFINE_STATEID_EVENT(layoutstate_alloc);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a3c9bfa77def..2410b093a2e6 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -881,20 +881,24 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
}
-static __be32
-nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
+static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct file *file, loff_t offset,
+ unsigned long *count, int host_err)
{
if (host_err >= 0) {
nfsdstats.io_read += host_err;
*count = host_err;
fsnotify_access(file);
+ trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
return 0;
- } else
+ } else {
+ trace_nfsd_read_err(rqstp, fhp, offset, host_err);
return nfserrno(host_err);
+ }
}
-__be32 nfsd_splice_read(struct svc_rqst *rqstp,
- struct file *file, loff_t offset, unsigned long *count)
+__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct file *file, loff_t offset, unsigned long *count)
{
struct splice_desc sd = {
.len = 0,
@@ -904,21 +908,23 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp,
};
int host_err;
+ trace_nfsd_read_splice(rqstp, fhp, offset, *count);
rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
- return nfsd_finish_read(file, count, host_err);
+ return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
}
-__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
- unsigned long *count)
+__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct file *file, loff_t offset,
+ struct kvec *vec, int vlen, unsigned long *count)
{
struct iov_iter iter;
int host_err;
+ trace_nfsd_read_vector(rqstp, fhp, offset, *count);
iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
host_err = vfs_iter_read(file, &iter, &offset, 0);
-
- return nfsd_finish_read(file, count, host_err);
+ return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
}
/*
@@ -965,13 +971,15 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
{
struct svc_export *exp;
struct iov_iter iter;
- __be32 err = 0;
+ __be32 nfserr;
int host_err;
int use_wgather;
loff_t pos = offset;
unsigned int pflags = current->flags;
rwf_t flags = 0;
+ trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
+
if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
/*
* We want less throttling in balance_dirty_pages()
@@ -994,22 +1002,23 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0)
goto out_nfserr;
- *cnt = host_err;
- nfsdstats.io_write += host_err;
+ nfsdstats.io_write += *cnt;
fsnotify_modify(file);
if (stable && use_wgather)
host_err = wait_for_concurrent_writes(file);
out_nfserr:
- dprintk("nfsd: write complete host_err=%d\n", host_err);
- if (host_err >= 0)
- err = 0;
- else
- err = nfserrno(host_err);
+ if (host_err >= 0) {
+ trace_nfsd_write_io_done(rqstp, fhp, offset, *cnt);
+ nfserr = nfs_ok;
+ } else {
+ trace_nfsd_write_err(rqstp, fhp, offset, host_err);
+ nfserr = nfserrno(host_err);
+ }
if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
current_restore_flags(pflags, PF_LESS_THROTTLE);
- return err;
+ return nfserr;
}
/*
@@ -1024,27 +1033,23 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct raparms *ra;
__be32 err;
- trace_read_start(rqstp, fhp, offset, vlen);
+ trace_nfsd_read_start(rqstp, fhp, offset, *count);
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (err)
return err;
ra = nfsd_init_raparms(file);
- trace_read_opened(rqstp, fhp, offset, vlen);
-
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
- err = nfsd_splice_read(rqstp, file, offset, count);
+ err = nfsd_splice_read(rqstp, fhp, file, offset, count);
else
- err = nfsd_readv(file, offset, vec, vlen, count);
-
- trace_read_io_done(rqstp, fhp, offset, vlen);
+ err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
if (ra)
nfsd_put_raparams(file, ra);
fput(file);
- trace_read_done(rqstp, fhp, offset, vlen);
+ trace_nfsd_read_done(rqstp, fhp, offset, *count);
return err;
}
@@ -1061,18 +1066,16 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
struct file *file = NULL;
__be32 err = 0;
- trace_write_start(rqstp, fhp, offset, vlen);
+ trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
if (err)
goto out;
- trace_write_opened(rqstp, fhp, offset, vlen);
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
- trace_write_io_done(rqstp, fhp, offset, vlen);
fput(file);
out:
- trace_write_done(rqstp, fhp, offset, vlen);
+ trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
return err;
}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index be6d8e00453f..a7e107309f76 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -78,10 +78,13 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
struct raparms;
-__be32 nfsd_splice_read(struct svc_rqst *,
- struct file *, loff_t, unsigned long *);
-__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
- unsigned long *);
+__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct file *file, loff_t offset,
+ unsigned long *count);
+__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct file *file, loff_t offset,
+ struct kvec *vec, int vlen,
+ unsigned long *count);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 2f4f22e6b8cb..ea7cca3a64b7 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -34,7 +34,7 @@ struct nfsd_writeargs {
svc_fh fh;
__u32 offset;
int len;
- int vlen;
+ struct kvec first;
};
struct nfsd_createargs {
@@ -72,6 +72,7 @@ struct nfsd_symlinkargs {
char * tname;
unsigned int tlen;
struct iattr attrs;
+ struct kvec first;
};
struct nfsd_readdirargs {
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 056bf8a7364e..2cb29e961a76 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -41,7 +41,7 @@ struct nfsd3_writeargs {
__u32 count;
int stable;
__u32 len;
- int vlen;
+ struct kvec first;
};
struct nfsd3_createargs {
@@ -90,6 +90,7 @@ struct nfsd3_symlinkargs {
char * tname;
unsigned int tlen;
struct iattr attrs;
+ struct kvec first;
};
struct nfsd3_readdirargs {
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index bc29511b6405..17c453a7999c 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -110,6 +110,7 @@ struct nfsd4_create {
struct {
u32 datalen;
char *data;
+ struct kvec first;
} link; /* NF4LNK */
struct {
u32 specdata1;
@@ -118,12 +119,14 @@ struct nfsd4_create {
} u;
u32 cr_bmval[3]; /* request */
struct iattr cr_iattr; /* request */
+ int cr_umask; /* request */
struct nfsd4_change_info cr_cinfo; /* response */
struct nfs4_acl *cr_acl;
struct xdr_netobj cr_label;
};
#define cr_datalen u.link.datalen
#define cr_data u.link.data
+#define cr_first u.link.first
#define cr_specdata1 u.dev.specdata1
#define cr_specdata2 u.dev.specdata2
@@ -228,6 +231,7 @@ struct nfsd4_open {
u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */
u32 op_create; /* request */
u32 op_createmode; /* request */
+ int op_umask; /* request */
u32 op_bmval[3]; /* request */
struct iattr op_iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
nfs4_verifier op_verf __attribute__((aligned(32)));
@@ -518,7 +522,6 @@ struct nfsd4_copy {
u64 cp_count;
/* both */
- bool cp_consecutive;
bool cp_synchronous;
/* response */
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 6702a6a0bbb5..d51e1bb781cf 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -139,23 +139,32 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
return false;
}
-struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask,
+struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
+ struct inode *inode, u32 mask,
const struct path *path)
{
struct fanotify_event_info *event;
+ gfp_t gfp = GFP_KERNEL;
+
+ /*
+ * For queues with unlimited length lost events are not expected and
+ * can possibly have security implications. Avoid losing events when
+ * memory is short.
+ */
+ if (group->max_events == UINT_MAX)
+ gfp |= __GFP_NOFAIL;
if (fanotify_is_perm_event(mask)) {
struct fanotify_perm_event_info *pevent;
- pevent = kmem_cache_alloc(fanotify_perm_event_cachep,
- GFP_KERNEL);
+ pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
if (!pevent)
return NULL;
event = &pevent->fae;
pevent->response = 0;
goto init;
}
- event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL);
+ event = kmem_cache_alloc(fanotify_event_cachep, gfp);
if (!event)
return NULL;
init: __maybe_unused
@@ -210,10 +219,17 @@ static int fanotify_handle_event(struct fsnotify_group *group,
return 0;
}
- event = fanotify_alloc_event(inode, mask, data);
+ event = fanotify_alloc_event(group, inode, mask, data);
ret = -ENOMEM;
- if (unlikely(!event))
+ if (unlikely(!event)) {
+ /*
+ * We don't queue overflow events for permission events as
+ * there the access is denied and so no event is in fact lost.
+ */
+ if (!fanotify_is_perm_event(mask))
+ fsnotify_queue_overflow(group);
goto finish;
+ }
fsn_event = &event->fse;
ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 256d9d1ddea9..8609ba06f474 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -52,5 +52,6 @@ static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
return container_of(fse, struct fanotify_event_info, fse);
}
-struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask,
+struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
+ struct inode *inode, u32 mask,
const struct path *path);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index fa803a58a605..ec4d8c59d0e3 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -757,7 +757,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
group->fanotify_data.user = user;
atomic_inc(&user->fanotify_listeners);
- oevent = fanotify_alloc_event(NULL, FS_Q_OVERFLOW, NULL);
+ oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL);
if (unlikely(!oevent)) {
fd = -ENOMEM;
goto out_destroy_group;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 8b73332735ba..40dedb37a1f3 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -99,8 +99,14 @@ int inotify_handle_event(struct fsnotify_group *group,
fsn_mark);
event = kmalloc(alloc_len, GFP_KERNEL);
- if (unlikely(!event))
+ if (unlikely(!event)) {
+ /*
+ * Treat lost event due to ENOMEM the same way as queue
+ * overflow to let userspace know event was lost.
+ */
+ fsnotify_queue_overflow(group);
return -ENOMEM;
+ }
fsn_event = &event->fse;
fsnotify_init_event(fsn_event, inode, mask);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 43c23653ce2e..ef32f3657958 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -307,6 +307,20 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
spin_unlock(&group->notification_lock);
ret = put_user(send_len, (int __user *) p);
break;
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ case INOTIFY_IOC_SETNEXTWD:
+ ret = -EINVAL;
+ if (arg >= 1 && arg <= INT_MAX) {
+ struct inotify_group_private_data *data;
+
+ data = &group->inotify_data;
+ spin_lock(&data->idr_lock);
+ idr_set_cursor(&data->idr, (unsigned int)arg);
+ spin_unlock(&data->idr_lock);
+ ret = 0;
+ }
+ break;
+#endif /* CONFIG_CHECKPOINT_RESTORE */
}
return ret;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 66f85c651c52..3c3e36745f59 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -111,7 +111,8 @@ int fsnotify_add_event(struct fsnotify_group *group,
return 2;
}
- if (group->q_len >= group->max_events) {
+ if (event == group->overflow_event ||
+ group->q_len >= group->max_events) {
ret = 2;
/* Queue overflow event only if it isn't already queued */
if (!list_empty(&group->overflow_event->list)) {
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 2831f495a674..32c523cf5a2d 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -381,7 +381,7 @@ unm_err_out:
* vfs inode dirty. This ensures that any changes to the mft record are
* written out to disk.
*
- * NOTE: We only set I_DIRTY_SYNC and I_DIRTY_DATASYNC (and not I_DIRTY_PAGES)
+ * NOTE: We only set I_DIRTY_DATASYNC (and not I_DIRTY_PAGES)
* on the base vfs inode, because even though file data may have been modified,
* it is dirty in the inode meta data rather than the data page cache of the
* inode, and thus there are no data pages that need writing out. Therefore, a
@@ -407,7 +407,7 @@ void __mark_mft_record_dirty(ntfs_inode *ni)
else
base_ni = ni->ext.base_ntfs_ino;
mutex_unlock(&ni->extent_lock);
- __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+ __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_DATASYNC);
}
static const char *ntfs_please_email = "Please email "
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9a876bb07cac..0f157bbd3e0f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7119,7 +7119,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
goto out_commit;
did_quota = 1;
- data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
+ data_ac->ac_resv = &oi->ip_la_data_resv;
ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
&num);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index e8e205bf2e41..302cd7caa4a7 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -346,7 +346,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
unlock = 0;
out_alloc:
- up_read(&OCFS2_I(inode)->ip_alloc_sem);
+ up_read(&oi->ip_alloc_sem);
out_inode_unlock:
ocfs2_inode_unlock(inode, 0);
out:
@@ -2213,7 +2213,7 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
down_write(&oi->ip_alloc_sem);
if (first_get_block) {
- if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+ if (ocfs2_sparse_alloc(osb))
ret = ocfs2_zero_tail(inode, di_bh, pos);
else
ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos,
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 8614ff069d99..3494a62ed749 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -78,7 +78,7 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
/*
* Using a named enum representing lock types in terms of #N bit stored in
* iocb->private, which is going to be used for communication between
- * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
+ * ocfs2_dio_end_io() and ocfs2_file_write/read_iter().
*/
enum ocfs2_iocb_lock_bits {
OCFS2_IOCB_RW_LOCK = 0,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index ea8c551bcd7e..91a8889abf9b 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -570,7 +570,16 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
current_page, vec_len, vec_start);
len = bio_add_page(bio, page, vec_len, vec_start);
- if (len != vec_len) break;
+ if (len != vec_len) {
+ mlog(ML_ERROR, "Adding page[%d] to bio failed, "
+ "page %p, len %d, vec_len %u, vec_start %u, "
+ "bi_sector %llu\n", current_page, page, len,
+ vec_len, vec_start,
+ (unsigned long long)bio->bi_iter.bi_sector);
+ bio_put(bio);
+ bio = ERR_PTR(-EIO);
+ return bio;
+ }
cs += vec_len / (PAGE_SIZE/spp);
vec_start = 0;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 977763d4c27d..b048d4fa3959 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3072,7 +3072,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
* We need to return the correct block within the
* cluster which should hold our entry.
*/
- off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb),
+ off = ocfs2_dx_dir_hash_idx(osb,
&lookup->dl_hinfo);
get_bh(dx_leaves[off]);
lookup->dl_dx_leaf_bh = dx_leaves[off];
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index fd6bbbbd7d78..39831fc2fd52 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -224,14 +224,12 @@ void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
struct dlm_lock *lock)
{
dlm_astlockfunc_t *fn;
- struct dlm_lockstatus *lksb;
mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name,
res->lockname.len, res->lockname.name,
dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
- lksb = lock->lksb;
fn = lock->ast;
BUG_ON(lock->ml.node != dlm->node_num);
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index e9f3705c4c9f..d06e27ec4be4 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -140,6 +140,7 @@ struct dlm_ctxt
u8 node_num;
u32 key;
u8 joining_node;
+ u8 migrate_done; /* set to 1 means node has migrated all lock resources */
wait_queue_head_t dlm_join_events;
unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
@@ -960,13 +961,10 @@ static inline int dlm_send_proxy_ast(struct dlm_ctxt *dlm,
void dlm_print_one_lock_resource(struct dlm_lock_resource *res);
void __dlm_print_one_lock_resource(struct dlm_lock_resource *res);
-u8 dlm_nm_this_node(struct dlm_ctxt *dlm);
void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);
void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);
-int dlm_nm_init(struct dlm_ctxt *dlm);
-int dlm_heartbeat_init(struct dlm_ctxt *dlm);
void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data);
void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index e1fea149f50b..425081be6161 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -461,6 +461,19 @@ redo_bucket:
cond_resched_lock(&dlm->spinlock);
num += n;
}
+
+ if (!num) {
+ if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
+ mlog(0, "%s: perhaps there are more lock resources "
+ "need to be migrated after dlm recovery\n", dlm->name);
+ ret = -EAGAIN;
+ } else {
+ mlog(0, "%s: we won't do dlm recovery after migrating "
+ "all lock resources\n", dlm->name);
+ dlm->migrate_done = 1;
+ }
+ }
+
spin_unlock(&dlm->spinlock);
wake_up(&dlm->dlm_thread_wq);
@@ -675,20 +688,6 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm)
spin_unlock(&dlm->spinlock);
}
-int dlm_shutting_down(struct dlm_ctxt *dlm)
-{
- int ret = 0;
-
- spin_lock(&dlm_domain_lock);
-
- if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
- ret = 1;
-
- spin_unlock(&dlm_domain_lock);
-
- return ret;
-}
-
void dlm_unregister_domain(struct dlm_ctxt *dlm)
{
int leave = 0;
@@ -2052,6 +2051,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
init_waitqueue_head(&dlm->dlm_join_events);
+ dlm->migrate_done = 0;
+
dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
diff --git a/fs/ocfs2/dlm/dlmdomain.h b/fs/ocfs2/dlm/dlmdomain.h
index fd6122a38dbd..8a9281411c18 100644
--- a/fs/ocfs2/dlm/dlmdomain.h
+++ b/fs/ocfs2/dlm/dlmdomain.h
@@ -28,7 +28,30 @@
extern spinlock_t dlm_domain_lock;
extern struct list_head dlm_domains;
-int dlm_shutting_down(struct dlm_ctxt *dlm);
+static inline int dlm_joined(struct dlm_ctxt *dlm)
+{
+ int ret = 0;
+
+ spin_lock(&dlm_domain_lock);
+ if (dlm->dlm_state == DLM_CTXT_JOINED)
+ ret = 1;
+ spin_unlock(&dlm_domain_lock);
+
+ return ret;
+}
+
+static inline int dlm_shutting_down(struct dlm_ctxt *dlm)
+{
+ int ret = 0;
+
+ spin_lock(&dlm_domain_lock);
+ if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
+ ret = 1;
+ spin_unlock(&dlm_domain_lock);
+
+ return ret;
+}
+
void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm,
int node_num);
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 66c2a491f68d..74962315794e 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -77,8 +77,7 @@ int dlm_init_lock_cache(void)
void dlm_destroy_lock_cache(void)
{
- if (dlm_lock_cache)
- kmem_cache_destroy(dlm_lock_cache);
+ kmem_cache_destroy(dlm_lock_cache);
}
/* Tell us whether we can grant a new lock request.
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a7df226f9449..aaca0949fe53 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -414,8 +414,7 @@ int dlm_init_mle_cache(void)
void dlm_destroy_mle_cache(void)
{
- if (dlm_mle_cache)
- kmem_cache_destroy(dlm_mle_cache);
+ kmem_cache_destroy(dlm_mle_cache);
}
static void dlm_mle_release(struct kref *kref)
@@ -472,15 +471,11 @@ bail:
void dlm_destroy_master_caches(void)
{
- if (dlm_lockname_cache) {
- kmem_cache_destroy(dlm_lockname_cache);
- dlm_lockname_cache = NULL;
- }
+ kmem_cache_destroy(dlm_lockname_cache);
+ dlm_lockname_cache = NULL;
- if (dlm_lockres_cache) {
- kmem_cache_destroy(dlm_lockres_cache);
- dlm_lockres_cache = NULL;
- }
+ kmem_cache_destroy(dlm_lockres_cache);
+ dlm_lockres_cache = NULL;
}
static void dlm_lockres_release(struct kref *kref)
@@ -2495,13 +2490,13 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
}
/*
- * A migrateable resource is one that is :
+ * A migratable resource is one that is :
* 1. locally mastered, and,
* 2. zero local locks, and,
* 3. one or more non-local locks, or, one or more references
* Returns 1 if yes, 0 if not.
*/
-static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
+static int dlm_is_lockres_migratable(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
enum dlm_lockres_list idx;
@@ -2532,7 +2527,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
continue;
}
cookie = be64_to_cpu(lock->ml.cookie);
- mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on "
+ mlog(0, "%s: Not migratable res %.*s, lock %u:%llu on "
"%s list\n", dlm->name, res->lockname.len,
res->lockname.name,
dlm_get_lock_cookie_node(cookie),
@@ -2548,7 +2543,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
return 0;
}
- mlog(0, "%s: res %.*s, Migrateable\n", dlm->name, res->lockname.len,
+ mlog(0, "%s: res %.*s, Migratable\n", dlm->name, res->lockname.len,
res->lockname.name);
return 1;
@@ -2792,7 +2787,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
assert_spin_locked(&dlm->spinlock);
spin_lock(&res->spinlock);
- if (dlm_is_lockres_migrateable(dlm, res))
+ if (dlm_is_lockres_migratable(dlm, res))
target = dlm_pick_migration_target(dlm, res);
spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index ec8f75813beb..802636d50365 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -62,7 +62,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node);
static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node);
static int dlm_request_all_locks(struct dlm_ctxt *dlm,
u8 request_from, u8 dead_node);
-static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node);
+static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm);
static inline int dlm_num_locks_in_lockres(struct dlm_lock_resource *res);
static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
@@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
static void dlm_begin_recovery(struct dlm_ctxt *dlm)
{
- spin_lock(&dlm->spinlock);
+ assert_spin_locked(&dlm->spinlock);
BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n",
dlm->name, dlm->reco.dead_node);
dlm->reco.state |= DLM_RECO_STATE_ACTIVE;
- spin_unlock(&dlm->spinlock);
}
static void dlm_end_recovery(struct dlm_ctxt *dlm)
@@ -456,6 +455,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
spin_lock(&dlm->spinlock);
+ if (dlm->migrate_done) {
+ mlog(0, "%s: no need do recovery after migrating all "
+ "lock resources\n", dlm->name);
+ spin_unlock(&dlm->spinlock);
+ return 0;
+ }
+
/* check to see if the new master has died */
if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM &&
test_bit(dlm->reco.new_master, dlm->recovery_map)) {
@@ -490,12 +496,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
dlm->reco.dead_node);
- spin_unlock(&dlm->spinlock);
/* take write barrier */
/* (stops the list reshuffling thread, proxy ast handling) */
dlm_begin_recovery(dlm);
+ spin_unlock(&dlm->spinlock);
+
if (dlm->reco.new_master == dlm->node_num)
goto master_here;
@@ -739,7 +746,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
}
if (destroy)
- dlm_destroy_recovery_area(dlm, dead_node);
+ dlm_destroy_recovery_area(dlm);
return status;
}
@@ -764,7 +771,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
ndata = kzalloc(sizeof(*ndata), GFP_NOFS);
if (!ndata) {
- dlm_destroy_recovery_area(dlm, dead_node);
+ dlm_destroy_recovery_area(dlm);
return -ENOMEM;
}
ndata->node_num = num;
@@ -778,7 +785,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
return 0;
}
-static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
+static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm)
{
struct dlm_reco_node_data *ndata, *next;
LIST_HEAD(tmplist);
@@ -1378,6 +1385,15 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
if (!dlm_grab(dlm))
return -EINVAL;
+ if (!dlm_joined(dlm)) {
+ mlog(ML_ERROR, "Domain %s not joined! "
+ "lockres %.*s, master %u\n",
+ dlm->name, mres->lockname_len,
+ mres->lockname, mres->master);
+ dlm_put(dlm);
+ return -EINVAL;
+ }
+
BUG_ON(!(mres->flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION)));
real_master = mres->master;
@@ -1807,7 +1823,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
int i, j, bad;
struct dlm_lock *lock;
u8 from = O2NM_MAX_NODES;
- unsigned int added = 0;
__be64 c;
mlog(0, "running %d locks for this lockres\n", mres->num_locks);
@@ -1823,7 +1838,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock);
dlm_lockres_set_refmap_bit(dlm, res, from);
spin_unlock(&res->spinlock);
- added++;
break;
}
BUG_ON(ml->highest_blocked != LKM_IVMODE);
@@ -1911,7 +1925,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
/* do not alter lock refcount. switching lists. */
list_move_tail(&lock->list, queue);
spin_unlock(&res->spinlock);
- added++;
mlog(0, "just reordered a local lock!\n");
continue;
@@ -2037,7 +2050,6 @@ skip_lvb:
"setting refmap bit\n", dlm->name,
res->lockname.len, res->lockname.name, ml->node);
dlm_lockres_set_refmap_bit(dlm, res, ml->node);
- added++;
}
spin_unlock(&res->spinlock);
}
@@ -2331,13 +2343,6 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
__dlm_dirty_lockres(dlm, res);
}
-/* if this node is the recovery master, and there are no
- * locks for a given lockres owned by this node that are in
- * either PR or EX mode, zero out the lvb before requesting.
- *
- */
-
-
static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
{
struct dlm_lock_resource *res;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 9479f99c2145..97a972efab83 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1756,8 +1756,7 @@ int ocfs2_rw_lock(struct inode *inode, int write)
level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
- status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
- 0);
+ status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
if (status < 0)
mlog_errno(status);
@@ -1796,7 +1795,7 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
write ? "EXMODE" : "PRMODE");
if (!ocfs2_mount_local(osb))
- ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
+ ocfs2_cluster_unlock(osb, lockres, level);
}
/*
@@ -1816,8 +1815,7 @@ int ocfs2_open_lock(struct inode *inode)
lockres = &OCFS2_I(inode)->ip_open_lockres;
- status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
- DLM_LOCK_PR, 0, 0);
+ status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_PR, 0, 0);
if (status < 0)
mlog_errno(status);
@@ -1854,8 +1852,7 @@ int ocfs2_try_open_lock(struct inode *inode, int write)
* other nodes and the -EAGAIN will indicate to the caller that
* this inode is still in use.
*/
- status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
- level, DLM_LKF_NOQUEUE, 0);
+ status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0);
out:
return status;
@@ -1876,11 +1873,9 @@ void ocfs2_open_unlock(struct inode *inode)
goto out;
if(lockres->l_ro_holders)
- ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
- DLM_LOCK_PR);
+ ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_PR);
if(lockres->l_ex_holders)
- ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
- DLM_LOCK_EX);
+ ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
out:
return;
@@ -2601,9 +2596,9 @@ void ocfs2_inode_unlock(struct inode *inode,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
ex ? "EXMODE" : "PRMODE");
- if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
+ if (!ocfs2_is_hard_readonly(osb) &&
!ocfs2_mount_local(osb))
- ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
+ ocfs2_cluster_unlock(osb, lockres, level);
}
/*
@@ -3537,7 +3532,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
* On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
* expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
* we can recover correctly from node failure. Otherwise, we may get
- * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
+ * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
*/
if (!ocfs2_is_o2cb_active() &&
lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 5d1784a365a3..6ee94bc23f5b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -101,7 +101,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
struct ocfs2_inode_info *oi = OCFS2_I(inode);
trace_ocfs2_file_open(inode, file, file->f_path.dentry,
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ (unsigned long long)oi->ip_blkno,
file->f_path.dentry->d_name.len,
file->f_path.dentry->d_name.name, mode);
@@ -116,7 +116,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
/* Check that the inode hasn't been wiped from disk by another
* node. If it hasn't then we're safe as long as we hold the
* spin lock until our increment of open count. */
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
+ if (oi->ip_flags & OCFS2_INODE_DELETED) {
spin_unlock(&oi->ip_lock);
status = -ENOENT;
@@ -190,7 +190,7 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
bool needs_barrier = false;
trace_ocfs2_sync_file(inode, file, file->f_path.dentry,
- OCFS2_I(inode)->ip_blkno,
+ oi->ip_blkno,
file->f_path.dentry->d_name.len,
file->f_path.dentry->d_name.name,
(unsigned long long)datasync);
@@ -296,7 +296,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
ocfs2_journal_dirty(handle, bh);
out_commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+ ocfs2_commit_trans(osb, handle);
out:
return ret;
}
@@ -2257,7 +2257,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
- trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
+ trace_ocfs2_file_write_iter(inode, file, file->f_path.dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
file->f_path.dentry->d_name.len,
file->f_path.dentry->d_name.name,
@@ -2405,7 +2405,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
- trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
+ trace_ocfs2_file_read_iter(inode, filp, filp->f_path.dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
filp->f_path.dentry->d_name.len,
filp->f_path.dentry->d_name.name,
@@ -2448,7 +2448,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
*
* Take and drop the meta data lock to update inode fields
* like i_size. This allows the checks down below
- * generic_file_aio_read() a chance of actually working.
+ * generic_file_read_iter() a chance of actually working.
*/
ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
!nowait);
@@ -2460,7 +2460,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
ocfs2_inode_unlock(inode, lock_level);
ret = generic_file_read_iter(iocb, to);
- trace_generic_file_aio_read_ret(ret);
+ trace_generic_file_read_iter_ret(ret);
/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 6b92cb241138..f65f2b2f594d 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -53,36 +53,6 @@ static const char * const ocfs2_filecheck_errs[] = {
"UNSUPPORTED"
};
-static DEFINE_SPINLOCK(ocfs2_filecheck_sysfs_lock);
-static LIST_HEAD(ocfs2_filecheck_sysfs_list);
-
-struct ocfs2_filecheck {
- struct list_head fc_head; /* File check entry list head */
- spinlock_t fc_lock;
- unsigned int fc_max; /* Maximum number of entry in list */
- unsigned int fc_size; /* Current entry count in list */
- unsigned int fc_done; /* Finished entry count in list */
-};
-
-struct ocfs2_filecheck_sysfs_entry { /* sysfs entry per mounting */
- struct list_head fs_list;
- atomic_t fs_count;
- struct super_block *fs_sb;
- struct kset *fs_devicekset;
- struct kset *fs_fcheckkset;
- struct ocfs2_filecheck *fs_fcheck;
-};
-
-#define OCFS2_FILECHECK_MAXSIZE 100
-#define OCFS2_FILECHECK_MINSIZE 10
-
-/* File check operation type */
-enum {
- OCFS2_FILECHECK_TYPE_CHK = 0, /* Check a file(inode) */
- OCFS2_FILECHECK_TYPE_FIX, /* Fix a file(inode) */
- OCFS2_FILECHECK_TYPE_SET = 100 /* Set entry list maximum size */
-};
-
struct ocfs2_filecheck_entry {
struct list_head fe_list;
unsigned long fe_ino;
@@ -110,35 +80,84 @@ ocfs2_filecheck_error(int errno)
return ocfs2_filecheck_errs[errno - OCFS2_FILECHECK_ERR_START + 1];
}
-static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf);
-static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count);
-static struct kobj_attribute ocfs2_attr_filecheck_chk =
+static ssize_t ocfs2_filecheck_attr_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf);
+static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count);
+static struct kobj_attribute ocfs2_filecheck_attr_chk =
__ATTR(check, S_IRUSR | S_IWUSR,
- ocfs2_filecheck_show,
- ocfs2_filecheck_store);
-static struct kobj_attribute ocfs2_attr_filecheck_fix =
+ ocfs2_filecheck_attr_show,
+ ocfs2_filecheck_attr_store);
+static struct kobj_attribute ocfs2_filecheck_attr_fix =
__ATTR(fix, S_IRUSR | S_IWUSR,
- ocfs2_filecheck_show,
- ocfs2_filecheck_store);
-static struct kobj_attribute ocfs2_attr_filecheck_set =
+ ocfs2_filecheck_attr_show,
+ ocfs2_filecheck_attr_store);
+static struct kobj_attribute ocfs2_filecheck_attr_set =
__ATTR(set, S_IRUSR | S_IWUSR,
- ocfs2_filecheck_show,
- ocfs2_filecheck_store);
+ ocfs2_filecheck_attr_show,
+ ocfs2_filecheck_attr_store);
+static struct attribute *ocfs2_filecheck_attrs[] = {
+ &ocfs2_filecheck_attr_chk.attr,
+ &ocfs2_filecheck_attr_fix.attr,
+ &ocfs2_filecheck_attr_set.attr,
+ NULL
+};
+
+static void ocfs2_filecheck_release(struct kobject *kobj)
+{
+ struct ocfs2_filecheck_sysfs_entry *entry = container_of(kobj,
+ struct ocfs2_filecheck_sysfs_entry, fs_kobj);
+
+ complete(&entry->fs_kobj_unregister);
+}
+
+static ssize_t
+ocfs2_filecheck_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ ssize_t ret = -EIO;
+ struct kobj_attribute *kattr = container_of(attr,
+ struct kobj_attribute, attr);
+
+ kobject_get(kobj);
+ if (kattr->show)
+ ret = kattr->show(kobj, kattr, buf);
+ kobject_put(kobj);
+ return ret;
+}
+
+static ssize_t
+ocfs2_filecheck_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ ssize_t ret = -EIO;
+ struct kobj_attribute *kattr = container_of(attr,
+ struct kobj_attribute, attr);
+
+ kobject_get(kobj);
+ if (kattr->store)
+ ret = kattr->store(kobj, kattr, buf, count);
+ kobject_put(kobj);
+ return ret;
+}
+
+static const struct sysfs_ops ocfs2_filecheck_ops = {
+ .show = ocfs2_filecheck_show,
+ .store = ocfs2_filecheck_store,
+};
+
+static struct kobj_type ocfs2_ktype_filecheck = {
+ .default_attrs = ocfs2_filecheck_attrs,
+ .sysfs_ops = &ocfs2_filecheck_ops,
+ .release = ocfs2_filecheck_release,
+};
static void
ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
{
struct ocfs2_filecheck_entry *p;
- if (!atomic_dec_and_test(&entry->fs_count)) {
- wait_var_event(&entry->fs_count,
- !atomic_read(&entry->fs_count));
- }
-
spin_lock(&entry->fs_fcheck->fc_lock);
while (!list_empty(&entry->fs_fcheck->fc_head)) {
p = list_first_entry(&entry->fs_fcheck->fc_head,
@@ -149,151 +168,48 @@ ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
}
spin_unlock(&entry->fs_fcheck->fc_lock);
- kset_unregister(entry->fs_fcheckkset);
- kset_unregister(entry->fs_devicekset);
kfree(entry->fs_fcheck);
- kfree(entry);
-}
-
-static void
-ocfs2_filecheck_sysfs_add(struct ocfs2_filecheck_sysfs_entry *entry)
-{
- spin_lock(&ocfs2_filecheck_sysfs_lock);
- list_add_tail(&entry->fs_list, &ocfs2_filecheck_sysfs_list);
- spin_unlock(&ocfs2_filecheck_sysfs_lock);
+ entry->fs_fcheck = NULL;
}
-static int ocfs2_filecheck_sysfs_del(const char *devname)
+int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb)
{
- struct ocfs2_filecheck_sysfs_entry *p;
-
- spin_lock(&ocfs2_filecheck_sysfs_lock);
- list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) {
- if (!strcmp(p->fs_sb->s_id, devname)) {
- list_del(&p->fs_list);
- spin_unlock(&ocfs2_filecheck_sysfs_lock);
- ocfs2_filecheck_sysfs_free(p);
- return 0;
- }
- }
- spin_unlock(&ocfs2_filecheck_sysfs_lock);
- return 1;
-}
-
-static void
-ocfs2_filecheck_sysfs_put(struct ocfs2_filecheck_sysfs_entry *entry)
-{
- if (atomic_dec_and_test(&entry->fs_count))
- wake_up_var(&entry->fs_count);
-}
-
-static struct ocfs2_filecheck_sysfs_entry *
-ocfs2_filecheck_sysfs_get(const char *devname)
-{
- struct ocfs2_filecheck_sysfs_entry *p = NULL;
-
- spin_lock(&ocfs2_filecheck_sysfs_lock);
- list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) {
- if (!strcmp(p->fs_sb->s_id, devname)) {
- atomic_inc(&p->fs_count);
- spin_unlock(&ocfs2_filecheck_sysfs_lock);
- return p;
- }
- }
- spin_unlock(&ocfs2_filecheck_sysfs_lock);
- return NULL;
-}
-
-int ocfs2_filecheck_create_sysfs(struct super_block *sb)
-{
- int ret = 0;
- struct kset *device_kset = NULL;
- struct kset *fcheck_kset = NULL;
- struct ocfs2_filecheck *fcheck = NULL;
- struct ocfs2_filecheck_sysfs_entry *entry = NULL;
- struct attribute **attrs = NULL;
- struct attribute_group attrgp;
-
- if (!ocfs2_kset)
- return -ENOMEM;
-
- attrs = kmalloc(sizeof(struct attribute *) * 4, GFP_NOFS);
- if (!attrs) {
- ret = -ENOMEM;
- goto error;
- } else {
- attrs[0] = &ocfs2_attr_filecheck_chk.attr;
- attrs[1] = &ocfs2_attr_filecheck_fix.attr;
- attrs[2] = &ocfs2_attr_filecheck_set.attr;
- attrs[3] = NULL;
- memset(&attrgp, 0, sizeof(attrgp));
- attrgp.attrs = attrs;
- }
+ int ret;
+ struct ocfs2_filecheck *fcheck;
+ struct ocfs2_filecheck_sysfs_entry *entry = &osb->osb_fc_ent;
fcheck = kmalloc(sizeof(struct ocfs2_filecheck), GFP_NOFS);
- if (!fcheck) {
- ret = -ENOMEM;
- goto error;
- } else {
- INIT_LIST_HEAD(&fcheck->fc_head);
- spin_lock_init(&fcheck->fc_lock);
- fcheck->fc_max = OCFS2_FILECHECK_MINSIZE;
- fcheck->fc_size = 0;
- fcheck->fc_done = 0;
- }
-
- if (strlen(sb->s_id) <= 0) {
- mlog(ML_ERROR,
- "Cannot get device basename when create filecheck sysfs\n");
- ret = -ENODEV;
- goto error;
- }
-
- device_kset = kset_create_and_add(sb->s_id, NULL, &ocfs2_kset->kobj);
- if (!device_kset) {
- ret = -ENOMEM;
- goto error;
- }
-
- fcheck_kset = kset_create_and_add("filecheck", NULL,
- &device_kset->kobj);
- if (!fcheck_kset) {
- ret = -ENOMEM;
- goto error;
- }
-
- ret = sysfs_create_group(&fcheck_kset->kobj, &attrgp);
- if (ret)
- goto error;
+ if (!fcheck)
+ return -ENOMEM;
- entry = kmalloc(sizeof(struct ocfs2_filecheck_sysfs_entry), GFP_NOFS);
- if (!entry) {
- ret = -ENOMEM;
- goto error;
- } else {
- atomic_set(&entry->fs_count, 1);
- entry->fs_sb = sb;
- entry->fs_devicekset = device_kset;
- entry->fs_fcheckkset = fcheck_kset;
- entry->fs_fcheck = fcheck;
- ocfs2_filecheck_sysfs_add(entry);
+ INIT_LIST_HEAD(&fcheck->fc_head);
+ spin_lock_init(&fcheck->fc_lock);
+ fcheck->fc_max = OCFS2_FILECHECK_MINSIZE;
+ fcheck->fc_size = 0;
+ fcheck->fc_done = 0;
+
+ entry->fs_kobj.kset = osb->osb_dev_kset;
+ init_completion(&entry->fs_kobj_unregister);
+ ret = kobject_init_and_add(&entry->fs_kobj, &ocfs2_ktype_filecheck,
+ NULL, "filecheck");
+ if (ret) {
+ kfree(fcheck);
+ return ret;
}
- kfree(attrs);
+ entry->fs_fcheck = fcheck;
return 0;
-
-error:
- kfree(attrs);
- kfree(entry);
- kfree(fcheck);
- kset_unregister(fcheck_kset);
- kset_unregister(device_kset);
- return ret;
}
-int ocfs2_filecheck_remove_sysfs(struct super_block *sb)
+void ocfs2_filecheck_remove_sysfs(struct ocfs2_super *osb)
{
- return ocfs2_filecheck_sysfs_del(sb->s_id);
+ if (!osb->osb_fc_ent.fs_fcheck)
+ return;
+
+ kobject_del(&osb->osb_fc_ent.fs_kobj);
+ kobject_put(&osb->osb_fc_ent.fs_kobj);
+ wait_for_completion(&osb->osb_fc_ent.fs_kobj_unregister);
+ ocfs2_filecheck_sysfs_free(&osb->osb_fc_ent);
}
static int
@@ -310,7 +226,7 @@ ocfs2_filecheck_adjust_max(struct ocfs2_filecheck_sysfs_entry *ent,
spin_lock(&ent->fs_fcheck->fc_lock);
if (len < (ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done)) {
- mlog(ML_ERROR,
+ mlog(ML_NOTICE,
"Cannot set online file check maximum entry number "
"to %u due to too many pending entries(%u)\n",
len, ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done);
@@ -387,7 +303,7 @@ ocfs2_filecheck_args_parse(const char *name, const char *buf, size_t count,
return 0;
}
-static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
+static ssize_t ocfs2_filecheck_attr_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
@@ -395,19 +311,12 @@ static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
ssize_t ret = 0, total = 0, remain = PAGE_SIZE;
unsigned int type;
struct ocfs2_filecheck_entry *p;
- struct ocfs2_filecheck_sysfs_entry *ent;
+ struct ocfs2_filecheck_sysfs_entry *ent = container_of(kobj,
+ struct ocfs2_filecheck_sysfs_entry, fs_kobj);
if (ocfs2_filecheck_type_parse(attr->attr.name, &type))
return -EINVAL;
- ent = ocfs2_filecheck_sysfs_get(kobj->parent->name);
- if (!ent) {
- mlog(ML_ERROR,
- "Cannot get the corresponding entry via device basename %s\n",
- kobj->name);
- return -ENODEV;
- }
-
if (type == OCFS2_FILECHECK_TYPE_SET) {
spin_lock(&ent->fs_fcheck->fc_lock);
total = snprintf(buf, remain, "%u\n", ent->fs_fcheck->fc_max);
@@ -441,11 +350,26 @@ static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
spin_unlock(&ent->fs_fcheck->fc_lock);
exit:
- ocfs2_filecheck_sysfs_put(ent);
return total;
}
-static int
+static inline int
+ocfs2_filecheck_is_dup_entry(struct ocfs2_filecheck_sysfs_entry *ent,
+ unsigned long ino)
+{
+ struct ocfs2_filecheck_entry *p;
+
+ list_for_each_entry(p, &ent->fs_fcheck->fc_head, fe_list) {
+ if (!p->fe_done) {
+ if (p->fe_ino == ino)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static inline int
ocfs2_filecheck_erase_entry(struct ocfs2_filecheck_sysfs_entry *ent)
{
struct ocfs2_filecheck_entry *p;
@@ -484,21 +408,21 @@ static void
ocfs2_filecheck_done_entry(struct ocfs2_filecheck_sysfs_entry *ent,
struct ocfs2_filecheck_entry *entry)
{
- entry->fe_done = 1;
spin_lock(&ent->fs_fcheck->fc_lock);
+ entry->fe_done = 1;
ent->fs_fcheck->fc_done++;
spin_unlock(&ent->fs_fcheck->fc_lock);
}
static unsigned int
-ocfs2_filecheck_handle(struct super_block *sb,
+ocfs2_filecheck_handle(struct ocfs2_super *osb,
unsigned long ino, unsigned int flags)
{
unsigned int ret = OCFS2_FILECHECK_ERR_SUCCESS;
struct inode *inode = NULL;
int rc;
- inode = ocfs2_iget(OCFS2_SB(sb), ino, flags, 0);
+ inode = ocfs2_iget(osb, ino, flags, 0);
if (IS_ERR(inode)) {
rc = (int)(-(long)inode);
if (rc >= OCFS2_FILECHECK_ERR_START &&
@@ -516,11 +440,14 @@ static void
ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent,
struct ocfs2_filecheck_entry *entry)
{
+ struct ocfs2_super *osb = container_of(ent, struct ocfs2_super,
+ osb_fc_ent);
+
if (entry->fe_type == OCFS2_FILECHECK_TYPE_CHK)
- entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb,
+ entry->fe_status = ocfs2_filecheck_handle(osb,
entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_CHK);
else if (entry->fe_type == OCFS2_FILECHECK_TYPE_FIX)
- entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb,
+ entry->fe_status = ocfs2_filecheck_handle(osb,
entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_FIX);
else
entry->fe_status = OCFS2_FILECHECK_ERR_UNSUPPORTED;
@@ -528,30 +455,21 @@ ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent,
ocfs2_filecheck_done_entry(ent, entry);
}
-static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
+static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
+ ssize_t ret = 0;
struct ocfs2_filecheck_args args;
struct ocfs2_filecheck_entry *entry;
- struct ocfs2_filecheck_sysfs_entry *ent;
- ssize_t ret = 0;
+ struct ocfs2_filecheck_sysfs_entry *ent = container_of(kobj,
+ struct ocfs2_filecheck_sysfs_entry, fs_kobj);
if (count == 0)
return count;
- if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args)) {
- mlog(ML_ERROR, "Invalid arguments for online file check\n");
+ if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args))
return -EINVAL;
- }
-
- ent = ocfs2_filecheck_sysfs_get(kobj->parent->name);
- if (!ent) {
- mlog(ML_ERROR,
- "Cannot get the corresponding entry via device basename %s\n",
- kobj->parent->name);
- return -ENODEV;
- }
if (args.fa_type == OCFS2_FILECHECK_TYPE_SET) {
ret = ocfs2_filecheck_adjust_max(ent, args.fa_len);
@@ -565,13 +483,16 @@ static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
}
spin_lock(&ent->fs_fcheck->fc_lock);
- if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
- (ent->fs_fcheck->fc_done == 0)) {
- mlog(ML_ERROR,
+ if (ocfs2_filecheck_is_dup_entry(ent, args.fa_ino)) {
+ ret = -EEXIST;
+ kfree(entry);
+ } else if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
+ (ent->fs_fcheck->fc_done == 0)) {
+ mlog(ML_NOTICE,
"Cannot do more file check "
"since file check queue(%u) is full now\n",
ent->fs_fcheck->fc_max);
- ret = -EBUSY;
+ ret = -EAGAIN;
kfree(entry);
} else {
if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
@@ -596,6 +517,5 @@ static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
ocfs2_filecheck_handle_entry(ent, entry);
exit:
- ocfs2_filecheck_sysfs_put(ent);
return (!ret ? count : ret);
}
diff --git a/fs/ocfs2/filecheck.h b/fs/ocfs2/filecheck.h
index e5cd002a2c09..6a22ee79e8d0 100644
--- a/fs/ocfs2/filecheck.h
+++ b/fs/ocfs2/filecheck.h
@@ -43,7 +43,32 @@ enum {
#define OCFS2_FILECHECK_ERR_START OCFS2_FILECHECK_ERR_FAILED
#define OCFS2_FILECHECK_ERR_END OCFS2_FILECHECK_ERR_UNSUPPORTED
-int ocfs2_filecheck_create_sysfs(struct super_block *sb);
-int ocfs2_filecheck_remove_sysfs(struct super_block *sb);
+struct ocfs2_filecheck {
+ struct list_head fc_head; /* File check entry list head */
+ spinlock_t fc_lock;
+ unsigned int fc_max; /* Maximum number of entry in list */
+ unsigned int fc_size; /* Current entry count in list */
+ unsigned int fc_done; /* Finished entry count in list */
+};
+
+#define OCFS2_FILECHECK_MAXSIZE 100
+#define OCFS2_FILECHECK_MINSIZE 10
+
+/* File check operation type */
+enum {
+ OCFS2_FILECHECK_TYPE_CHK = 0, /* Check a file(inode) */
+ OCFS2_FILECHECK_TYPE_FIX, /* Fix a file(inode) */
+ OCFS2_FILECHECK_TYPE_SET = 100 /* Set entry list maximum size */
+};
+
+struct ocfs2_filecheck_sysfs_entry { /* sysfs entry per partition */
+ struct kobject fs_kobj;
+ struct completion fs_kobj_unregister;
+ struct ocfs2_filecheck *fs_fcheck;
+};
+
+
+int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb);
+void ocfs2_filecheck_remove_sysfs(struct ocfs2_super *osb);
#endif /* FILECHECK_H */
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index d51b80edd972..ddc3e9470c87 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1135,7 +1135,7 @@ static void ocfs2_clear_inode(struct inode *inode)
trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno,
inode->i_nlink);
- mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
+ mlog_bug_on_msg(osb == NULL,
"Inode=%lu\n", inode->i_ino);
dquot_drop(inode);
@@ -1150,7 +1150,7 @@ static void ocfs2_clear_inode(struct inode *inode)
ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres);
ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres);
- ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap,
+ ocfs2_resv_discard(&osb->osb_la_resmap,
&oi->ip_la_data_resv);
ocfs2_resv_init_once(&oi->ip_la_data_resv);
@@ -1160,7 +1160,7 @@ static void ocfs2_clear_inode(struct inode *inode)
* exception here are successfully wiped inodes - their
* metadata can now be considered to be part of the system
* inodes from which it came. */
- if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED))
+ if (!(oi->ip_flags & OCFS2_INODE_DELETED))
ocfs2_checkpoint_inode(inode);
mlog_bug_on_msg(!list_empty(&oi->ip_io_markers),
@@ -1223,7 +1223,7 @@ static void ocfs2_clear_inode(struct inode *inode)
* the journal is flushed before journal shutdown. Thus it is safe to
* have inodes get cleaned up after journal shutdown.
*/
- jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
+ jbd2_journal_release_jbd_inode(osb->journal->j_journal,
&oi->ip_jinode);
}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index c801eddc4bf3..8dd6f703c819 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -525,7 +525,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
* these are used by the support functions here and in
* callers. */
inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
- OCFS2_I(inode)->ip_blkno = fe_blkno;
+ oi->ip_blkno = fe_blkno;
spin_lock(&osb->osb_lock);
inode->i_generation = osb->s_next_generation++;
spin_unlock(&osb->osb_lock);
@@ -1186,8 +1186,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
}
trace_ocfs2_double_lock_end(
- (unsigned long long)OCFS2_I(inode1)->ip_blkno,
- (unsigned long long)OCFS2_I(inode2)->ip_blkno);
+ (unsigned long long)oi1->ip_blkno,
+ (unsigned long long)oi2->ip_blkno);
bail:
if (status)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 6867eef2e06b..4f86ac0027b5 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -50,6 +50,8 @@
#include "reservations.h"
+#include "filecheck.h"
+
/* Caching of metadata buffers */
/* Most user visible OCFS2 inodes will have very few pieces of
@@ -472,6 +474,12 @@ struct ocfs2_super
* workqueue and schedule on our own.
*/
struct workqueue_struct *ocfs2_wq;
+
+ /* sysfs directory per partition */
+ struct kset *osb_dev_kset;
+
+ /* file check related stuff */
+ struct ocfs2_filecheck_sysfs_entry osb_fc_ent;
};
#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index e2a11aaece10..2ee76a90ba8f 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -1311,11 +1311,11 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_release);
DEFINE_OCFS2_FILE_OPS(ocfs2_sync_file);
-DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write);
+DEFINE_OCFS2_FILE_OPS(ocfs2_file_write_iter);
DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
-DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read);
+DEFINE_OCFS2_FILE_OPS(ocfs2_file_read_iter);
DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
@@ -1467,7 +1467,7 @@ TRACE_EVENT(ocfs2_prepare_inode_for_write,
__entry->saved_pos, __entry->count, __entry->wait)
);
-DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
+DEFINE_OCFS2_INT_EVENT(generic_file_read_iter_ret);
/* End of trace events for fs/ocfs2/file.c. */
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ab156e35ec00..01c6b3894406 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -573,7 +573,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
BUG_ON(ocfs2_is_refcount_inode(inode));
trace_ocfs2_create_refcount_tree(
- (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ (unsigned long long)oi->ip_blkno);
ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
if (ret) {
@@ -3359,7 +3359,7 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
unsigned int ext_flags;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
+ if (!ocfs2_refcount_tree(osb)) {
return ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
inode->i_ino);
}
@@ -3707,7 +3707,7 @@ int ocfs2_add_refcount_flag(struct inode *inode,
trace_ocfs2_add_refcount_flag(ref_blocks, credits);
if (ref_blocks) {
- ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
+ ret = ocfs2_reserve_new_metadata_blocks(osb,
ref_blocks, &meta_ac);
if (ret) {
mlog_errno(ret);
@@ -4766,8 +4766,8 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
*bh2 = *bh1;
trace_ocfs2_double_lock_end(
- (unsigned long long)OCFS2_I(inode1)->ip_blkno,
- (unsigned long long)OCFS2_I(inode2)->ip_blkno);
+ (unsigned long long)oi1->ip_blkno,
+ (unsigned long long)oi2->ip_blkno);
return 0;
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index dae9eb7c441e..d2fb97b173da 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -398,7 +398,7 @@ static int ocfs2_control_do_setnode_msg(struct file *file,
static int ocfs2_control_do_setversion_msg(struct file *file,
struct ocfs2_control_message_setv *msg)
- {
+{
long major, minor;
char *ptr = NULL;
struct ocfs2_control_private *p = file->private_data;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index d8f5f6ce99dc..f7c972fbed6a 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -79,8 +79,6 @@ static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res)
return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset);
}
-static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
-static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
static int ocfs2_block_group_fill(handle_t *handle,
struct inode *alloc_inode,
@@ -387,7 +385,7 @@ static int ocfs2_block_group_fill(handle_t *handle,
memset(bg, 0, sb->s_blocksize);
strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
- bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
+ bg->bg_generation = cpu_to_le32(osb->fs_generation);
bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1,
osb->s_feature_incompat));
bg->bg_chain = cpu_to_le16(my_chain);
@@ -1521,7 +1519,7 @@ static int ocfs2_cluster_group_search(struct inode *inode,
OCFS2_I(inode)->ip_clusters, max_bits);
}
- ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
+ ret = ocfs2_block_group_find_clear_bits(osb,
group_bh, bits_wanted,
max_bits, res);
if (ret)
@@ -2626,53 +2624,6 @@ int ocfs2_release_clusters(handle_t *handle,
_ocfs2_clear_bit);
}
-static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
-{
- printk("Block Group:\n");
- printk("bg_signature: %s\n", bg->bg_signature);
- printk("bg_size: %u\n", bg->bg_size);
- printk("bg_bits: %u\n", bg->bg_bits);
- printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
- printk("bg_chain: %u\n", bg->bg_chain);
- printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation));
- printk("bg_next_group: %llu\n",
- (unsigned long long)bg->bg_next_group);
- printk("bg_parent_dinode: %llu\n",
- (unsigned long long)bg->bg_parent_dinode);
- printk("bg_blkno: %llu\n",
- (unsigned long long)bg->bg_blkno);
-}
-
-static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
-{
- int i;
-
- printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
- printk("i_signature: %s\n", fe->i_signature);
- printk("i_size: %llu\n",
- (unsigned long long)fe->i_size);
- printk("i_clusters: %u\n", fe->i_clusters);
- printk("i_generation: %u\n",
- le32_to_cpu(fe->i_generation));
- printk("id1.bitmap1.i_used: %u\n",
- le32_to_cpu(fe->id1.bitmap1.i_used));
- printk("id1.bitmap1.i_total: %u\n",
- le32_to_cpu(fe->id1.bitmap1.i_total));
- printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg);
- printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc);
- printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count);
- printk("id2.i_chain.cl_next_free_rec: %u\n",
- fe->id2.i_chain.cl_next_free_rec);
- for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) {
- printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i,
- fe->id2.i_chain.cl_recs[i].c_free);
- printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
- fe->id2.i_chain.cl_recs[i].c_total);
- printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
- (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
- }
-}
-
/*
* For a given allocation, determine which allocators will need to be
* accessed, and lock them, reserving the appropriate number of bits.
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ffa4952d432b..3415e0b09398 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -423,10 +423,10 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait)
ocfs2_schedule_truncate_log_flush(osb, 0);
}
- if (jbd2_journal_start_commit(OCFS2_SB(sb)->journal->j_journal,
+ if (jbd2_journal_start_commit(osb->journal->j_journal,
&target)) {
if (wait)
- jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal,
+ jbd2_log_wait_commit(osb->journal->j_journal,
target);
}
return 0;
@@ -1161,6 +1161,23 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
ocfs2_complete_mount_recovery(osb);
+ osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL,
+ &ocfs2_kset->kobj);
+ if (!osb->osb_dev_kset) {
+ status = -ENOMEM;
+ mlog(ML_ERROR, "Unable to create device kset %s.\n", sb->s_id);
+ goto read_super_error;
+ }
+
+ /* Create filecheck sysfs related directories/files at
+ * /sys/fs/ocfs2/<devname>/filecheck */
+ if (ocfs2_filecheck_create_sysfs(osb)) {
+ status = -ENOMEM;
+ mlog(ML_ERROR, "Unable to create filecheck sysfs directory at "
+ "/sys/fs/ocfs2/%s/filecheck.\n", sb->s_id);
+ goto read_super_error;
+ }
+
if (ocfs2_mount_local(osb))
snprintf(nodestr, sizeof(nodestr), "local");
else
@@ -1199,9 +1216,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
/* Start this when the mount is almost sure of being successful */
ocfs2_orphan_scan_start(osb);
- /* Create filecheck sysfile /sys/fs/ocfs2/<devname>/filecheck */
- ocfs2_filecheck_create_sysfs(sb);
-
return status;
read_super_error:
@@ -1653,7 +1667,6 @@ static void ocfs2_put_super(struct super_block *sb)
ocfs2_sync_blockdev(sb);
ocfs2_dismount_volume(sb, 0);
- ocfs2_filecheck_remove_sysfs(sb);
}
static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1768,12 +1781,9 @@ static int ocfs2_initialize_mem_caches(void)
NULL);
if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep ||
!ocfs2_qf_chunk_cachep) {
- if (ocfs2_inode_cachep)
- kmem_cache_destroy(ocfs2_inode_cachep);
- if (ocfs2_dquot_cachep)
- kmem_cache_destroy(ocfs2_dquot_cachep);
- if (ocfs2_qf_chunk_cachep)
- kmem_cache_destroy(ocfs2_qf_chunk_cachep);
+ kmem_cache_destroy(ocfs2_inode_cachep);
+ kmem_cache_destroy(ocfs2_dquot_cachep);
+ kmem_cache_destroy(ocfs2_qf_chunk_cachep);
return -ENOMEM;
}
@@ -1787,16 +1797,13 @@ static void ocfs2_free_mem_caches(void)
* destroy cache.
*/
rcu_barrier();
- if (ocfs2_inode_cachep)
- kmem_cache_destroy(ocfs2_inode_cachep);
+ kmem_cache_destroy(ocfs2_inode_cachep);
ocfs2_inode_cachep = NULL;
- if (ocfs2_dquot_cachep)
- kmem_cache_destroy(ocfs2_dquot_cachep);
+ kmem_cache_destroy(ocfs2_dquot_cachep);
ocfs2_dquot_cachep = NULL;
- if (ocfs2_qf_chunk_cachep)
- kmem_cache_destroy(ocfs2_qf_chunk_cachep);
+ kmem_cache_destroy(ocfs2_qf_chunk_cachep);
ocfs2_qf_chunk_cachep = NULL;
}
@@ -1899,6 +1906,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
osb = OCFS2_SB(sb);
BUG_ON(!osb);
+ /* Remove file check sysfs related directores/files,
+ * and wait for the pending file check operations */
+ ocfs2_filecheck_remove_sysfs(osb);
+
+ kset_unregister(osb->osb_dev_kset);
+
debugfs_remove(osb->osb_ctxt);
/* Orphan scan should be stopped as early as possible */
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 82e17b076ce7..78f09c76ab3c 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -633,6 +633,5 @@ int __init init_ocfs2_uptodate_cache(void)
void exit_ocfs2_uptodate_cache(void)
{
- if (ocfs2_uptodate_cachep)
- kmem_cache_destroy(ocfs2_uptodate_cachep);
+ kmem_cache_destroy(ocfs2_uptodate_cachep);
}
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index c261c1dfd374..3a24ce3deb01 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3564,7 +3564,7 @@ int ocfs2_xattr_set(struct inode *inode,
.not_found = -ENODATA,
};
- if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
+ if (!ocfs2_supports_xattr(osb))
return -EOPNOTSUPP;
/*
diff --git a/fs/open.c b/fs/open.c
index d0e955b558ad..c5ee7cd60424 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -724,16 +724,6 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
return ksys_fchown(fd, user, group);
}
-int open_check_o_direct(struct file *f)
-{
- /* NB: we're sure to have correct a_ops only after f_op->open */
- if (f->f_flags & O_DIRECT) {
- if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
- return -EINVAL;
- }
- return 0;
-}
-
static int do_dentry_open(struct file *f,
struct inode *inode,
int (*open)(struct inode *, struct file *),
@@ -755,7 +745,7 @@ static int do_dentry_open(struct file *f,
if (unlikely(f->f_flags & O_PATH)) {
f->f_mode = FMODE_PATH;
f->f_op = &empty_fops;
- return 0;
+ goto done;
}
if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
@@ -808,7 +798,12 @@ static int do_dentry_open(struct file *f,
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
-
+done:
+ /* NB: we're sure to have correct a_ops only after f_op->open */
+ error = -EINVAL;
+ if ((f->f_flags & O_DIRECT) &&
+ (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO))
+ goto out_fput;
return 0;
cleanup_all:
@@ -823,6 +818,9 @@ cleanup_file:
f->f_path.dentry = NULL;
f->f_inode = NULL;
return error;
+out_fput:
+ fput(f);
+ return error;
}
/**
@@ -920,20 +918,14 @@ struct file *dentry_open(const struct path *path, int flags,
BUG_ON(!path->mnt);
f = get_empty_filp();
- if (!IS_ERR(f)) {
- f->f_flags = flags;
- error = vfs_open(path, f, cred);
- if (!error) {
- /* from now on we need fput() to dispose of f */
- error = open_check_o_direct(f);
- if (error) {
- fput(f);
- f = ERR_PTR(error);
- }
- } else {
- put_filp(f);
- f = ERR_PTR(error);
- }
+ if (IS_ERR(f))
+ return f;
+
+ f->f_flags = flags;
+ error = vfs_open(path, f, cred);
+ if (error) {
+ put_filp(f);
+ return ERR_PTR(error);
}
return f;
}
diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c
index 480ea059a680..10587413b20e 100644
--- a/fs/orangefs/acl.c
+++ b/fs/orangefs/acl.c
@@ -9,7 +9,6 @@
#include "orangefs-kernel.h"
#include "orangefs-bufmap.h"
#include <linux/posix_acl_xattr.h>
-#include <linux/fs_struct.h>
struct posix_acl *orangefs_get_acl(struct inode *inode, int type)
{
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index b03057afac2a..66369ec90020 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -463,11 +463,10 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
if (op->downcall.type != ORANGEFS_VFS_OP_READDIR)
goto wakeup;
- op->downcall.trailer_buf = vmalloc(op->downcall.trailer_size);
+ op->downcall.trailer_buf = vzalloc(op->downcall.trailer_size);
if (!op->downcall.trailer_buf)
goto Enomem;
- memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size);
if (!copy_from_iter_full(op->downcall.trailer_buf,
op->downcall.trailer_size, iter)) {
gossip_err("%s: failed to copy trailer.\n", __func__);
@@ -779,9 +778,35 @@ static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd,
#endif /* CONFIG_COMPAT is in .config */
+static __poll_t orangefs_devreq_poll(struct file *file,
+ struct poll_table_struct *poll_table)
+{
+ __poll_t poll_revent_mask = 0;
+
+ poll_wait(file, &orangefs_request_list_waitq, poll_table);
+
+ if (!list_empty(&orangefs_request_list))
+ poll_revent_mask |= EPOLLIN;
+ return poll_revent_mask;
+}
+
/* the assigned character device major number */
static int orangefs_dev_major;
+static const struct file_operations orangefs_devreq_file_operations = {
+ .owner = THIS_MODULE,
+ .read = orangefs_devreq_read,
+ .write_iter = orangefs_devreq_write_iter,
+ .open = orangefs_devreq_open,
+ .release = orangefs_devreq_release,
+ .unlocked_ioctl = orangefs_devreq_ioctl,
+
+#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
+ .compat_ioctl = orangefs_devreq_compat_ioctl,
+#endif
+ .poll = orangefs_devreq_poll
+};
+
/*
* Initialize orangefs device specific state:
* Must be called at module load time only
@@ -814,29 +839,3 @@ void orangefs_dev_cleanup(void)
"*** /dev/%s character device unregistered ***\n",
ORANGEFS_REQDEVICE_NAME);
}
-
-static __poll_t orangefs_devreq_poll(struct file *file,
- struct poll_table_struct *poll_table)
-{
- __poll_t poll_revent_mask = 0;
-
- poll_wait(file, &orangefs_request_list_waitq, poll_table);
-
- if (!list_empty(&orangefs_request_list))
- poll_revent_mask |= EPOLLIN;
- return poll_revent_mask;
-}
-
-const struct file_operations orangefs_devreq_file_operations = {
- .owner = THIS_MODULE,
- .read = orangefs_devreq_read,
- .write_iter = orangefs_devreq_write_iter,
- .open = orangefs_devreq_open,
- .release = orangefs_devreq_release,
- .unlocked_ioctl = orangefs_devreq_ioctl,
-
-#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
- .compat_ioctl = orangefs_devreq_compat_ioctl,
-#endif
- .poll = orangefs_devreq_poll
-};
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 0d228cd087e6..26358efbf794 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -42,70 +42,6 @@ static int flush_racache(struct inode *inode)
}
/*
- * Copy to client-core's address space from the buffers specified
- * by the iovec upto total_size bytes.
- * NOTE: the iovector can either contain addresses which
- * can futher be kernel-space or user-space addresses.
- * or it can pointers to struct page's
- */
-static int precopy_buffers(int buffer_index,
- struct iov_iter *iter,
- size_t total_size)
-{
- int ret = 0;
- /*
- * copy data from application/kernel by pulling it out
- * of the iovec.
- */
-
-
- if (total_size) {
- ret = orangefs_bufmap_copy_from_iovec(iter,
- buffer_index,
- total_size);
- if (ret < 0)
- gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
- __func__,
- (long)ret);
- }
-
- if (ret < 0)
- gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
- __func__,
- (long)ret);
- return ret;
-}
-
-/*
- * Copy from client-core's address space to the buffers specified
- * by the iovec upto total_size bytes.
- * NOTE: the iovector can either contain addresses which
- * can futher be kernel-space or user-space addresses.
- * or it can pointers to struct page's
- */
-static int postcopy_buffers(int buffer_index,
- struct iov_iter *iter,
- size_t total_size)
-{
- int ret = 0;
- /*
- * copy data to application/kernel by pushing it out to
- * the iovec. NOTE; target buffers can be addresses or
- * struct page pointers.
- */
- if (total_size) {
- ret = orangefs_bufmap_copy_to_iovec(iter,
- buffer_index,
- total_size);
- if (ret < 0)
- gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n",
- __func__,
- (long)ret);
- }
- return ret;
-}
-
-/*
* Post and wait for the I/O upcall to finish
*/
static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
@@ -157,14 +93,15 @@ populate_shared_memory:
total_size);
/*
* Stage 1: copy the buffers into client-core's address space
- * precopy_buffers only pertains to writes.
*/
- if (type == ORANGEFS_IO_WRITE) {
- ret = precopy_buffers(buffer_index,
- iter,
- total_size);
- if (ret < 0)
+ if (type == ORANGEFS_IO_WRITE && total_size) {
+ ret = orangefs_bufmap_copy_from_iovec(iter, buffer_index,
+ total_size);
+ if (ret < 0) {
+ gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
+ __func__, (long)ret);
goto out;
+ }
}
gossip_debug(GOSSIP_FILE_DEBUG,
@@ -260,14 +197,20 @@ populate_shared_memory:
/*
* Stage 3: Post copy buffers from client-core's address space
- * postcopy_buffers only pertains to reads.
*/
- if (type == ORANGEFS_IO_READ) {
- ret = postcopy_buffers(buffer_index,
- iter,
- new_op->downcall.resp.io.amt_complete);
- if (ret < 0)
+ if (type == ORANGEFS_IO_READ && new_op->downcall.resp.io.amt_complete) {
+ /*
+ * NOTE: the iovector can either contain addresses which
+ * can futher be kernel-space or user-space addresses.
+ * or it can pointers to struct page's
+ */
+ ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index,
+ new_op->downcall.resp.io.amt_complete);
+ if (ret < 0) {
+ gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n",
+ __func__, (long)ret);
goto out;
+ }
}
gossip_debug(GOSSIP_FILE_DEBUG,
"%s(%pU): Amount %s, returned by the sys-io call:%d\n",
@@ -585,6 +528,28 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar
return ret;
}
+static int orangefs_fault(struct vm_fault *vmf)
+{
+ struct file *file = vmf->vma->vm_file;
+ int rc;
+ rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1,
+ STATX_SIZE);
+ if (rc == -ESTALE)
+ rc = -EIO;
+ if (rc) {
+ gossip_err("%s: orangefs_inode_getattr failed, "
+ "rc:%d:.\n", __func__, rc);
+ return rc;
+ }
+ return filemap_fault(vmf);
+}
+
+const struct vm_operations_struct orangefs_file_vm_ops = {
+ .fault = orangefs_fault,
+ .map_pages = filemap_map_pages,
+ .page_mkwrite = filemap_page_mkwrite,
+};
+
/*
* Memory map a region of a file.
*/
@@ -596,12 +561,16 @@ static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma)
(char *)file->f_path.dentry->d_name.name :
(char *)"Unknown"));
+ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+ return -EINVAL;
+
/* set the sequential readahead hint */
vma->vm_flags |= VM_SEQ_READ;
vma->vm_flags &= ~VM_RAND_READ;
- /* Use readonly mmap since we cannot support writable maps. */
- return generic_file_readonly_mmap(file, vma);
+ file_accessed(file);
+ vma->vm_ops = &orangefs_file_vm_ops;
+ return 0;
}
#define mapping_nrpages(idata) ((idata)->nrpages)
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index fe1d705ad91f..79c61da8b1bc 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -138,7 +138,7 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
}
/** ORANGEFS2 implementation of address space operations */
-const struct address_space_operations orangefs_address_operations = {
+static const struct address_space_operations orangefs_address_operations = {
.readpage = orangefs_readpage,
.readpages = orangefs_readpages,
.invalidatepage = orangefs_invalidatepage,
@@ -307,7 +307,7 @@ int orangefs_update_time(struct inode *inode, struct timespec *time, int flags)
}
/* ORANGEDS2 implementation of VFS inode operations for files */
-const struct inode_operations orangefs_file_inode_operations = {
+static const struct inode_operations orangefs_file_inode_operations = {
.get_acl = orangefs_get_acl,
.set_acl = orangefs_set_acl,
.setattr = orangefs_setattr,
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 59f444dced9b..4f927023d095 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -71,9 +71,9 @@ static void put(struct slot_map *m, int slot)
spin_lock(&m->q.lock);
__clear_bit(slot, m->map);
v = ++m->c;
- if (unlikely(v == 1)) /* no free slots -> one free slot */
+ if (v > 0)
wake_up_locked(&m->q);
- else if (unlikely(v == -1)) /* finished dying */
+ if (unlikely(v == -1)) /* finished dying */
wake_up_all_locked(&m->q);
spin_unlock(&m->q.lock);
}
diff --git a/fs/orangefs/orangefs-debug.h b/fs/orangefs/orangefs-debug.h
index c7db56a31b92..6e079d4230d0 100644
--- a/fs/orangefs/orangefs-debug.h
+++ b/fs/orangefs/orangefs-debug.h
@@ -43,12 +43,6 @@
#define GOSSIP_MAX_NR 16
#define GOSSIP_MAX_DEBUG (((__u64)1 << GOSSIP_MAX_NR) - 1)
-/*function prototypes*/
-__u64 ORANGEFS_kmod_eventlog_to_mask(const char *event_logging);
-__u64 ORANGEFS_debug_eventlog_to_mask(const char *event_logging);
-char *ORANGEFS_debug_mask_to_eventlog(__u64 mask);
-char *ORANGEFS_kmod_mask_to_eventlog(__u64 mask);
-
/* a private internal type */
struct __keyword_mask_s {
const char *keyword;
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index eebbaece85ef..c29bb0ebc6bb 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -65,11 +65,7 @@
#define ORANGEFS_REQDEVICE_NAME "pvfs2-req"
#define ORANGEFS_DEVREQ_MAGIC 0x20030529
-#define ORANGEFS_LINK_MAX 0x000000FF
#define ORANGEFS_PURGE_RETRY_COUNT 0x00000005
-#define ORANGEFS_MAX_NUM_OPTIONS 0x00000004
-#define ORANGEFS_MAX_MOUNT_OPT_LEN 0x00000080
-#define ORANGEFS_MAX_FSKEY_LEN 64
#define MAX_DEV_REQ_UPSIZE (2 * sizeof(__s32) + \
sizeof(__u64) + sizeof(struct orangefs_upcall_s))
@@ -113,15 +109,6 @@ extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type);
extern int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
/*
- * Redefine xtvec structure so that we could move helper functions out of
- * the define
- */
-struct xtvec {
- __kernel_off_t xtv_off; /* must be off_t */
- __kernel_size_t xtv_len; /* must be size_t */
-};
-
-/*
* orangefs data structures
*/
struct orangefs_kernel_op_s {
@@ -224,39 +211,6 @@ struct orangefs_sb_info_s {
struct list_head list;
};
-/*
- * structure that holds the state of any async I/O operation issued
- * through the VFS. Needed especially to handle cancellation requests
- * or even completion notification so that the VFS client-side daemon
- * can free up its vfs_request slots.
- */
-struct orangefs_kiocb_s {
- /* the pointer to the task that initiated the AIO */
- struct task_struct *tsk;
-
- /* pointer to the kiocb that kicked this operation */
- struct kiocb *kiocb;
-
- /* buffer index that was used for the I/O */
- struct orangefs_bufmap *bufmap;
- int buffer_index;
-
- /* orangefs kernel operation type */
- struct orangefs_kernel_op_s *op;
-
- /* set to indicate the type of the operation */
- int rw;
-
- /* file offset */
- loff_t offset;
-
- /* and the count in bytes */
- size_t bytes_to_be_copied;
-
- ssize_t bytes_copied;
- int needs_cleanup;
-};
-
struct orangefs_stats {
unsigned long cache_hits;
unsigned long cache_misses;
@@ -305,21 +259,6 @@ static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode)
return &(ORANGEFS_I(inode)->refn.khandle);
}
-static inline ino_t get_ino_from_khandle(struct inode *inode)
-{
- struct orangefs_khandle *khandle;
- ino_t ino;
-
- khandle = get_khandle_from_ino(inode);
- ino = orangefs_khandle_to_ino(khandle);
- return ino;
-}
-
-static inline ino_t get_parent_ino_from_dentry(struct dentry *dentry)
-{
- return get_ino_from_khandle(dentry->d_parent->d_inode);
-}
-
static inline int is_root_handle(struct inode *inode)
{
gossip_debug(GOSSIP_DCACHE_DEBUG,
@@ -391,7 +330,6 @@ void fsid_key_table_finalize(void);
/*
* defined in inode.c
*/
-__u32 convert_to_orangefs_mask(unsigned long lite_mask);
struct inode *orangefs_new_inode(struct super_block *sb,
struct inode *dir,
int mode,
@@ -410,17 +348,6 @@ int orangefs_update_time(struct inode *, struct timespec *, int);
/*
* defined in xattr.c
*/
-int orangefs_setxattr(struct dentry *dentry,
- const char *name,
- const void *value,
- size_t size,
- int flags);
-
-ssize_t orangefs_getxattr(struct dentry *dentry,
- const char *name,
- void *buffer,
- size_t size);
-
ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/*
@@ -467,8 +394,6 @@ int orangefs_inode_check_changed(struct inode *inode);
int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr);
-int orangefs_unmount_sb(struct super_block *sb);
-
bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op);
int orangefs_normalize_to_errno(__s32 error_code);
@@ -487,16 +412,11 @@ extern struct list_head *orangefs_htable_ops_in_progress;
extern spinlock_t orangefs_htable_ops_in_progress_lock;
extern int hash_table_size;
-extern const struct address_space_operations orangefs_address_operations;
-extern const struct inode_operations orangefs_file_inode_operations;
extern const struct file_operations orangefs_file_operations;
extern const struct inode_operations orangefs_symlink_inode_operations;
extern const struct inode_operations orangefs_dir_inode_operations;
extern const struct file_operations orangefs_dir_operations;
extern const struct dentry_operations orangefs_dentry_operations;
-extern const struct file_operations orangefs_devreq_file_operations;
-
-extern wait_queue_head_t orangefs_bufmap_init_waitq;
/*
* misc convenience macros
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
index ea6256d136d1..00fadaf0da8f 100644
--- a/fs/orangefs/orangefs-utils.c
+++ b/fs/orangefs/orangefs-utils.c
@@ -500,7 +500,7 @@ int orangefs_normalize_to_errno(__s32 error_code)
* server.
*/
} else if (error_code > 0) {
- gossip_err("orangefs: error status receieved.\n");
+ gossip_err("orangefs: error status received.\n");
gossip_err("orangefs: assuming error code is inverted.\n");
error_code = -error_code;
}
diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h
index dc6e3e6269c3..61ee8d64c842 100644
--- a/fs/orangefs/protocol.h
+++ b/fs/orangefs/protocol.h
@@ -5,11 +5,6 @@
#include <linux/slab.h>
#include <linux/ioctl.h>
-/* pvfs2-config.h ***********************************************************/
-#define ORANGEFS_VERSION_MAJOR 2
-#define ORANGEFS_VERSION_MINOR 9
-#define ORANGEFS_VERSION_SUB 0
-
/* khandle stuff ***********************************************************/
/*
@@ -70,16 +65,6 @@ static inline void ORANGEFS_khandle_from(struct orangefs_khandle *kh,
}
/* pvfs2-types.h ************************************************************/
-typedef __u32 ORANGEFS_uid;
-typedef __u32 ORANGEFS_gid;
-typedef __s32 ORANGEFS_fs_id;
-typedef __u32 ORANGEFS_permissions;
-typedef __u64 ORANGEFS_time;
-typedef __s64 ORANGEFS_size;
-typedef __u64 ORANGEFS_flags;
-typedef __u64 ORANGEFS_ds_position;
-typedef __s32 ORANGEFS_error;
-typedef __s64 ORANGEFS_offset;
#define ORANGEFS_SUPER_MAGIC 0x20030528
@@ -145,7 +130,6 @@ typedef __s64 ORANGEFS_offset;
#define ORANGEFS_APPEND_FL FS_APPEND_FL
#define ORANGEFS_NOATIME_FL FS_NOATIME_FL
#define ORANGEFS_MIRROR_FL 0x01000000ULL
-#define ORANGEFS_O_EXECUTE (1 << 0)
#define ORANGEFS_FS_ID_NULL ((__s32)0)
#define ORANGEFS_ATTR_SYS_UID (1 << 0)
#define ORANGEFS_ATTR_SYS_GID (1 << 1)
@@ -229,35 +213,6 @@ enum orangefs_ds_type {
ORANGEFS_TYPE_INTERNAL = (1 << 5) /* for the server's private use */
};
-/*
- * ORANGEFS_certificate simply stores a buffer with the buffer size.
- * The buffer can be converted to an OpenSSL X509 struct for use.
- */
-struct ORANGEFS_certificate {
- __u32 buf_size;
- unsigned char *buf;
-};
-
-/*
- * A credential identifies a user and is signed by the client/user
- * private key.
- */
-struct ORANGEFS_credential {
- __u32 userid; /* user id */
- __u32 num_groups; /* length of group_array */
- __u32 *group_array; /* groups for which the user is a member */
- char *issuer; /* alias of the issuing server */
- __u64 timeout; /* seconds after epoch to time out */
- __u32 sig_size; /* length of the signature in bytes */
- unsigned char *signature; /* digital signature */
- struct ORANGEFS_certificate certificate; /* user certificate buffer */
-};
-#define extra_size_ORANGEFS_credential (ORANGEFS_REQ_LIMIT_GROUPS * \
- sizeof(__u32) + \
- ORANGEFS_REQ_LIMIT_ISSUER + \
- ORANGEFS_REQ_LIMIT_SIGNATURE + \
- extra_size_ORANGEFS_certificate)
-
/* This structure is used by the VFS-client interaction alone */
struct ORANGEFS_keyval_pair {
char key[ORANGEFS_MAX_XATTR_NAMELEN];
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index b42e5bd6d8ff..09c19ef91526 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -1,5 +1,6 @@
config PSTORE
tristate "Persistent store support"
+ select CRYPTO if PSTORE_COMPRESS
default n
help
This option enables generic access to platform level
@@ -12,35 +13,89 @@ config PSTORE
If you don't have a platform persistent store driver,
say N.
-choice
- prompt "Choose compression algorithm"
- depends on PSTORE
- default PSTORE_ZLIB_COMPRESS
- help
- This option chooses compression algorithm.
-
-config PSTORE_ZLIB_COMPRESS
- bool "ZLIB"
- select ZLIB_DEFLATE
- select ZLIB_INFLATE
- help
- This option enables ZLIB compression algorithm support.
+config PSTORE_DEFLATE_COMPRESS
+ tristate "DEFLATE (ZLIB) compression"
+ default y
+ depends on PSTORE
+ select CRYPTO_DEFLATE
+ help
+ This option enables DEFLATE (also known as ZLIB) compression
+ algorithm support.
config PSTORE_LZO_COMPRESS
- bool "LZO"
- select LZO_COMPRESS
- select LZO_DECOMPRESS
- help
- This option enables LZO compression algorithm support.
+ tristate "LZO compression"
+ depends on PSTORE
+ select CRYPTO_LZO
+ help
+ This option enables LZO compression algorithm support.
config PSTORE_LZ4_COMPRESS
- bool "LZ4"
- select LZ4_COMPRESS
- select LZ4_DECOMPRESS
- help
- This option enables LZ4 compression algorithm support.
+ tristate "LZ4 compression"
+ depends on PSTORE
+ select CRYPTO_LZ4
+ help
+ This option enables LZ4 compression algorithm support.
+
+config PSTORE_LZ4HC_COMPRESS
+ tristate "LZ4HC compression"
+ depends on PSTORE
+ select CRYPTO_LZ4HC
+ help
+ This option enables LZ4HC (high compression) mode algorithm.
+
+config PSTORE_842_COMPRESS
+ bool "842 compression"
+ depends on PSTORE
+ select CRYPTO_842
+ help
+ This option enables 842 compression algorithm support.
+
+config PSTORE_COMPRESS
+ def_bool y
+ depends on PSTORE
+ depends on PSTORE_DEFLATE_COMPRESS || PSTORE_LZO_COMPRESS || \
+ PSTORE_LZ4_COMPRESS || PSTORE_LZ4HC_COMPRESS || \
+ PSTORE_842_COMPRESS
+
+choice
+ prompt "Default pstore compression algorithm"
+ depends on PSTORE_COMPRESS
+ help
+ This option chooses the default active compression algorithm.
+ This change be changed at boot with "pstore.compress=..." on
+ the kernel command line.
+
+ Currently, pstore has support for 5 compression algorithms:
+ deflate, lzo, lz4, lz4hc and 842.
+
+ The default compression algorithm is deflate.
+
+ config PSTORE_DEFLATE_COMPRESS_DEFAULT
+ bool "deflate" if PSTORE_DEFLATE_COMPRESS
+
+ config PSTORE_LZO_COMPRESS_DEFAULT
+ bool "lzo" if PSTORE_LZO_COMPRESS
+
+ config PSTORE_LZ4_COMPRESS_DEFAULT
+ bool "lz4" if PSTORE_LZ4_COMPRESS
+
+ config PSTORE_LZ4HC_COMPRESS_DEFAULT
+ bool "lz4hc" if PSTORE_LZ4HC_COMPRESS
+
+ config PSTORE_842_COMPRESS_DEFAULT
+ bool "842" if PSTORE_842_COMPRESS
+
endchoice
+config PSTORE_COMPRESS_DEFAULT
+ string
+ depends on PSTORE_COMPRESS
+ default "deflate" if PSTORE_DEFLATE_COMPRESS_DEFAULT
+ default "lzo" if PSTORE_LZO_COMPRESS_DEFAULT
+ default "lz4" if PSTORE_LZ4_COMPRESS_DEFAULT
+ default "lz4hc" if PSTORE_LZ4HC_COMPRESS_DEFAULT
+ default "842" if PSTORE_842_COMPRESS_DEFAULT
+
config PSTORE_CONSOLE
bool "Log kernel console messages"
depends on PSTORE
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index d814723fb27d..5fcb845b9fec 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -486,6 +486,8 @@ static int __init init_pstore_fs(void)
{
int err;
+ pstore_choose_compression();
+
/* Create a convenient mount point for people to access pstore */
err = sysfs_create_mount_point(fs_kobj, "pstore");
if (err)
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index c029314478fa..fb767e28aeb2 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -37,4 +37,7 @@ extern bool pstore_is_mounted(void);
extern void pstore_record_init(struct pstore_record *record,
struct pstore_info *psi);
+/* Called during module_init() */
+extern void __init pstore_choose_compression(void);
+
#endif
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index c3129b131e4d..dc720573fd53 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -28,15 +28,13 @@
#include <linux/console.h>
#include <linux/module.h>
#include <linux/pstore.h>
-#ifdef CONFIG_PSTORE_ZLIB_COMPRESS
-#include <linux/zlib.h>
-#endif
-#ifdef CONFIG_PSTORE_LZO_COMPRESS
+#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS)
#include <linux/lzo.h>
#endif
-#ifdef CONFIG_PSTORE_LZ4_COMPRESS
+#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) || IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
#include <linux/lz4.h>
#endif
+#include <linux/crypto.h>
#include <linux/string.h>
#include <linux/timer.h>
#include <linux/slab.h>
@@ -74,23 +72,18 @@ static DEFINE_SPINLOCK(pstore_lock);
struct pstore_info *psinfo;
static char *backend;
-
-/* Compression parameters */
-#ifdef CONFIG_PSTORE_ZLIB_COMPRESS
-#define COMPR_LEVEL 6
-#define WINDOW_BITS 12
-#define MEM_LEVEL 4
-static struct z_stream_s stream;
+static char *compress =
+#ifdef CONFIG_PSTORE_COMPRESS_DEFAULT
+ CONFIG_PSTORE_COMPRESS_DEFAULT;
#else
-static unsigned char *workspace;
+ NULL;
#endif
-struct pstore_zbackend {
- int (*compress)(const void *in, void *out, size_t inlen, size_t outlen);
- int (*decompress)(void *in, void *out, size_t inlen, size_t outlen);
- void (*allocate)(void);
- void (*free)(void);
+/* Compression parameters */
+static struct crypto_comp *tfm;
+struct pstore_zbackend {
+ int (*zbufsize)(size_t size);
const char *name;
};
@@ -149,77 +142,12 @@ bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
}
EXPORT_SYMBOL_GPL(pstore_cannot_block_path);
-#ifdef CONFIG_PSTORE_ZLIB_COMPRESS
-/* Derived from logfs_compress() */
-static int compress_zlib(const void *in, void *out, size_t inlen, size_t outlen)
-{
- int err, ret;
-
- ret = -EIO;
- err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
- MEM_LEVEL, Z_DEFAULT_STRATEGY);
- if (err != Z_OK)
- goto error;
-
- stream.next_in = in;
- stream.avail_in = inlen;
- stream.total_in = 0;
- stream.next_out = out;
- stream.avail_out = outlen;
- stream.total_out = 0;
-
- err = zlib_deflate(&stream, Z_FINISH);
- if (err != Z_STREAM_END)
- goto error;
-
- err = zlib_deflateEnd(&stream);
- if (err != Z_OK)
- goto error;
-
- if (stream.total_out >= stream.total_in)
- goto error;
-
- ret = stream.total_out;
-error:
- return ret;
-}
-
-/* Derived from logfs_uncompress */
-static int decompress_zlib(void *in, void *out, size_t inlen, size_t outlen)
+#if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS)
+static int zbufsize_deflate(size_t size)
{
- int err, ret;
-
- ret = -EIO;
- err = zlib_inflateInit2(&stream, WINDOW_BITS);
- if (err != Z_OK)
- goto error;
-
- stream.next_in = in;
- stream.avail_in = inlen;
- stream.total_in = 0;
- stream.next_out = out;
- stream.avail_out = outlen;
- stream.total_out = 0;
-
- err = zlib_inflate(&stream, Z_FINISH);
- if (err != Z_STREAM_END)
- goto error;
-
- err = zlib_inflateEnd(&stream);
- if (err != Z_OK)
- goto error;
-
- ret = stream.total_out;
-error:
- return ret;
-}
-
-static void allocate_zlib(void)
-{
- size_t size;
size_t cmpr;
- switch (psinfo->bufsize) {
+ switch (size) {
/* buffer range for efivars */
case 1000 ... 2000:
cmpr = 56;
@@ -239,212 +167,131 @@ static void allocate_zlib(void)
break;
}
- big_oops_buf_sz = (psinfo->bufsize * 100) / cmpr;
- big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
- if (big_oops_buf) {
- size = max(zlib_deflate_workspacesize(WINDOW_BITS, MEM_LEVEL),
- zlib_inflate_workspacesize());
- stream.workspace = kmalloc(size, GFP_KERNEL);
- if (!stream.workspace) {
- pr_err("No memory for compression workspace; skipping compression\n");
- kfree(big_oops_buf);
- big_oops_buf = NULL;
- }
- } else {
- pr_err("No memory for uncompressed data; skipping compression\n");
- stream.workspace = NULL;
- }
-
+ return (size * 100) / cmpr;
}
-
-static void free_zlib(void)
-{
- kfree(stream.workspace);
- stream.workspace = NULL;
- kfree(big_oops_buf);
- big_oops_buf = NULL;
- big_oops_buf_sz = 0;
-}
-
-static const struct pstore_zbackend backend_zlib = {
- .compress = compress_zlib,
- .decompress = decompress_zlib,
- .allocate = allocate_zlib,
- .free = free_zlib,
- .name = "zlib",
-};
#endif
-#ifdef CONFIG_PSTORE_LZO_COMPRESS
-static int compress_lzo(const void *in, void *out, size_t inlen, size_t outlen)
+#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS)
+static int zbufsize_lzo(size_t size)
{
- int ret;
-
- ret = lzo1x_1_compress(in, inlen, out, &outlen, workspace);
- if (ret != LZO_E_OK) {
- pr_err("lzo_compress error, ret = %d!\n", ret);
- return -EIO;
- }
-
- return outlen;
+ return lzo1x_worst_compress(size);
}
+#endif
-static int decompress_lzo(void *in, void *out, size_t inlen, size_t outlen)
+#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) || IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
+static int zbufsize_lz4(size_t size)
{
- int ret;
-
- ret = lzo1x_decompress_safe(in, inlen, out, &outlen);
- if (ret != LZO_E_OK) {
- pr_err("lzo_decompress error, ret = %d!\n", ret);
- return -EIO;
- }
-
- return outlen;
+ return LZ4_compressBound(size);
}
+#endif
-static void allocate_lzo(void)
+#if IS_ENABLED(CONFIG_PSTORE_842_COMPRESS)
+static int zbufsize_842(size_t size)
{
- big_oops_buf_sz = lzo1x_worst_compress(psinfo->bufsize);
- big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
- if (big_oops_buf) {
- workspace = kmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
- if (!workspace) {
- pr_err("No memory for compression workspace; skipping compression\n");
- kfree(big_oops_buf);
- big_oops_buf = NULL;
- }
- } else {
- pr_err("No memory for uncompressed data; skipping compression\n");
- workspace = NULL;
- }
+ return size;
}
+#endif
-static void free_lzo(void)
-{
- kfree(workspace);
- kfree(big_oops_buf);
- big_oops_buf = NULL;
- big_oops_buf_sz = 0;
-}
+static const struct pstore_zbackend *zbackend __ro_after_init;
-static const struct pstore_zbackend backend_lzo = {
- .compress = compress_lzo,
- .decompress = decompress_lzo,
- .allocate = allocate_lzo,
- .free = free_lzo,
- .name = "lzo",
-};
+static const struct pstore_zbackend zbackends[] = {
+#if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS)
+ {
+ .zbufsize = zbufsize_deflate,
+ .name = "deflate",
+ },
+#endif
+#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS)
+ {
+ .zbufsize = zbufsize_lzo,
+ .name = "lzo",
+ },
+#endif
+#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS)
+ {
+ .zbufsize = zbufsize_lz4,
+ .name = "lz4",
+ },
#endif
+#if IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
+ {
+ .zbufsize = zbufsize_lz4,
+ .name = "lz4hc",
+ },
+#endif
+#if IS_ENABLED(CONFIG_PSTORE_842_COMPRESS)
+ {
+ .zbufsize = zbufsize_842,
+ .name = "842",
+ },
+#endif
+ { }
+};
-#ifdef CONFIG_PSTORE_LZ4_COMPRESS
-static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
+static int pstore_compress(const void *in, void *out,
+ unsigned int inlen, unsigned int outlen)
{
int ret;
- ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
- if (!ret) {
- pr_err("LZ4_compress_default error; compression failed!\n");
- return -EIO;
+ ret = crypto_comp_compress(tfm, in, inlen, out, &outlen);
+ if (ret) {
+ pr_err("crypto_comp_compress failed, ret = %d!\n", ret);
+ return ret;
}
- return ret;
+ return outlen;
}
-static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
+static int pstore_decompress(void *in, void *out,
+ unsigned int inlen, unsigned int outlen)
{
int ret;
- ret = LZ4_decompress_safe(in, out, inlen, outlen);
- if (ret < 0) {
- /*
- * LZ4_decompress_safe will return an error code
- * (< 0) if decompression failed
- */
- pr_err("LZ4_decompress_safe error, ret = %d!\n", ret);
- return -EIO;
+ ret = crypto_comp_decompress(tfm, in, inlen, out, &outlen);
+ if (ret) {
+ pr_err("crypto_comp_decompress failed, ret = %d!\n", ret);
+ return ret;
}
- return ret;
-}
-
-static void allocate_lz4(void)
-{
- big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
- big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
- if (big_oops_buf) {
- workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
- if (!workspace) {
- pr_err("No memory for compression workspace; skipping compression\n");
- kfree(big_oops_buf);
- big_oops_buf = NULL;
- }
- } else {
- pr_err("No memory for uncompressed data; skipping compression\n");
- workspace = NULL;
- }
+ return outlen;
}
-static void free_lz4(void)
+static void allocate_buf_for_compression(void)
{
- kfree(workspace);
- kfree(big_oops_buf);
- big_oops_buf = NULL;
- big_oops_buf_sz = 0;
-}
-
-static const struct pstore_zbackend backend_lz4 = {
- .compress = compress_lz4,
- .decompress = decompress_lz4,
- .allocate = allocate_lz4,
- .free = free_lz4,
- .name = "lz4",
-};
-#endif
-
-static const struct pstore_zbackend *zbackend =
-#if defined(CONFIG_PSTORE_ZLIB_COMPRESS)
- &backend_zlib;
-#elif defined(CONFIG_PSTORE_LZO_COMPRESS)
- &backend_lzo;
-#elif defined(CONFIG_PSTORE_LZ4_COMPRESS)
- &backend_lz4;
-#else
- NULL;
-#endif
+ if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !zbackend)
+ return;
-static int pstore_compress(const void *in, void *out,
- size_t inlen, size_t outlen)
-{
- if (zbackend)
- return zbackend->compress(in, out, inlen, outlen);
- else
- return -EIO;
-}
+ if (!crypto_has_comp(zbackend->name, 0, 0)) {
+ pr_err("No %s compression\n", zbackend->name);
+ return;
+ }
-static int pstore_decompress(void *in, void *out, size_t inlen, size_t outlen)
-{
- if (zbackend)
- return zbackend->decompress(in, out, inlen, outlen);
- else
- return -EIO;
-}
+ big_oops_buf_sz = zbackend->zbufsize(psinfo->bufsize);
+ if (big_oops_buf_sz <= 0)
+ return;
-static void allocate_buf_for_compression(void)
-{
- if (zbackend) {
- pr_info("using %s compression\n", zbackend->name);
- zbackend->allocate();
- } else {
+ big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
+ if (!big_oops_buf) {
pr_err("allocate compression buffer error!\n");
+ return;
+ }
+
+ tfm = crypto_alloc_comp(zbackend->name, 0, 0);
+ if (IS_ERR_OR_NULL(tfm)) {
+ kfree(big_oops_buf);
+ big_oops_buf = NULL;
+ pr_err("crypto_alloc_comp() failed!\n");
+ return;
}
}
static void free_buf_for_compression(void)
{
- if (zbackend)
- zbackend->free();
- else
- pr_err("free compression buffer error!\n");
+ if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && !IS_ERR_OR_NULL(tfm))
+ crypto_free_comp(tfm);
+ kfree(big_oops_buf);
+ big_oops_buf = NULL;
+ big_oops_buf_sz = 0;
}
/*
@@ -901,5 +748,24 @@ static void pstore_timefunc(struct timer_list *unused)
jiffies + msecs_to_jiffies(pstore_update_ms));
}
+void __init pstore_choose_compression(void)
+{
+ const struct pstore_zbackend *step;
+
+ if (!compress)
+ return;
+
+ for (step = zbackends; step->name; step++) {
+ if (!strcmp(compress, step->name)) {
+ zbackend = step;
+ pr_info("using %s compression\n", zbackend->name);
+ return;
+ }
+ }
+}
+
+module_param(compress, charp, 0444);
+MODULE_PARM_DESC(compress, "Pstore compression to use");
+
module_param(backend, charp, 0444);
MODULE_PARM_DESC(backend, "Pstore backend to use");
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 7125b398d312..49b2bc114868 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -938,7 +938,7 @@ static int __init ramoops_init(void)
ramoops_register_dummy();
return platform_driver_register(&ramoops_driver);
}
-postcore_initcall(ramoops_init);
+late_initcall(ramoops_init);
static void __exit ramoops_exit(void)
{
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index e11672aa4575..951a14edcf51 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -98,24 +98,23 @@ static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
uint8_t *data, size_t len, uint8_t *ecc)
{
int i;
- uint16_t par[prz->ecc_info.ecc_size];
/* Initialize the parity buffer */
- memset(par, 0, sizeof(par));
- encode_rs8(prz->rs_decoder, data, len, par, 0);
+ memset(prz->ecc_info.par, 0,
+ prz->ecc_info.ecc_size * sizeof(prz->ecc_info.par[0]));
+ encode_rs8(prz->rs_decoder, data, len, prz->ecc_info.par, 0);
for (i = 0; i < prz->ecc_info.ecc_size; i++)
- ecc[i] = par[i];
+ ecc[i] = prz->ecc_info.par[i];
}
static int persistent_ram_decode_rs8(struct persistent_ram_zone *prz,
void *data, size_t len, uint8_t *ecc)
{
int i;
- uint16_t par[prz->ecc_info.ecc_size];
for (i = 0; i < prz->ecc_info.ecc_size; i++)
- par[i] = ecc[i];
- return decode_rs8(prz->rs_decoder, data, par, len,
+ prz->ecc_info.par[i] = ecc[i];
+ return decode_rs8(prz->rs_decoder, data, prz->ecc_info.par, len,
NULL, 0, NULL, 0, NULL);
}
@@ -228,6 +227,15 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz,
return -EINVAL;
}
+ /* allocate workspace instead of using stack VLA */
+ prz->ecc_info.par = kmalloc_array(prz->ecc_info.ecc_size,
+ sizeof(*prz->ecc_info.par),
+ GFP_KERNEL);
+ if (!prz->ecc_info.par) {
+ pr_err("cannot allocate ECC parity workspace\n");
+ return -ENOMEM;
+ }
+
prz->corrected_bytes = 0;
prz->bad_blocks = 0;
@@ -514,6 +522,13 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
}
prz->vaddr = NULL;
}
+ if (prz->rs_decoder) {
+ free_rs(prz->rs_decoder);
+ prz->rs_decoder = NULL;
+ }
+ kfree(prz->ecc_info.par);
+ prz->ecc_info.par = NULL;
+
persistent_ram_free_old(prz);
kfree(prz);
}
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 48835a659948..ae4811fecc1f 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -1916,7 +1916,7 @@ struct reiserfs_de_head {
/* empty directory contains two entries "." and ".." and their headers */
#define EMPTY_DIR_SIZE \
-(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen ("..")))
+(DEH_SIZE * 2 + ROUND_UP (sizeof(".") - 1) + ROUND_UP (sizeof("..") - 1))
/* old format directories have this size when empty */
#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3)
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index cf348ba99238..1acb2ff505e6 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1256,7 +1256,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
* Inode length changed, so we have to make sure
* @I_DIRTY_DATASYNC is set.
*/
- __mark_inode_dirty(inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
else
mark_inode_dirty_sync(inode);
mutex_unlock(&ui->ui_mutex);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 356c2bf148a5..cd31e4f6d6da 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -257,12 +257,22 @@ const struct file_operations udf_file_operations = {
static int udf_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
+ struct super_block *sb = inode->i_sb;
int error;
error = setattr_prepare(dentry, attr);
if (error)
return error;
+ if ((attr->ia_valid & ATTR_UID) &&
+ UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET) &&
+ !uid_eq(attr->ia_uid, UDF_SB(sb)->s_uid))
+ return -EPERM;
+ if ((attr->ia_valid & ATTR_GID) &&
+ UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET) &&
+ !gid_eq(attr->ia_gid, UDF_SB(sb)->s_gid))
+ return -EPERM;
+
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
error = udf_setsize(inode, attr->ia_size);
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index b6e420c1bfeb..b7a0d4b4bda1 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -104,6 +104,10 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode)
}
inode_init_owner(inode, dir, mode);
+ if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET))
+ inode->i_uid = sbi->s_uid;
+ if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET))
+ inode->i_gid = sbi->s_gid;
iinfo->i_location.logicalBlockNum = block;
iinfo->i_location.partitionReferenceNum =
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index c23744d5ae5c..c80765d62f7e 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1275,6 +1275,7 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode)
unsigned int indirections = 0;
int bs = inode->i_sb->s_blocksize;
int ret = -EIO;
+ uint32_t uid, gid;
reread:
if (iloc->partitionReferenceNum >= sbi->s_partitions) {
@@ -1400,17 +1401,19 @@ reread:
ret = -EIO;
read_lock(&sbi->s_cred_lock);
- i_uid_write(inode, le32_to_cpu(fe->uid));
- if (!uid_valid(inode->i_uid) ||
- UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) ||
+ uid = le32_to_cpu(fe->uid);
+ if (uid == UDF_INVALID_ID ||
UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_SET))
- inode->i_uid = UDF_SB(inode->i_sb)->s_uid;
+ inode->i_uid = sbi->s_uid;
+ else
+ i_uid_write(inode, uid);
- i_gid_write(inode, le32_to_cpu(fe->gid));
- if (!gid_valid(inode->i_gid) ||
- UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_IGNORE) ||
+ gid = le32_to_cpu(fe->gid);
+ if (gid == UDF_INVALID_ID ||
UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET))
- inode->i_gid = UDF_SB(inode->i_sb)->s_gid;
+ inode->i_gid = sbi->s_gid;
+ else
+ i_gid_write(inode, gid);
if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY &&
sbi->s_fmode != UDF_INVALID_MODE)
@@ -1655,12 +1658,12 @@ static int udf_update_inode(struct inode *inode, int do_sync)
}
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET))
- fe->uid = cpu_to_le32(-1);
+ fe->uid = cpu_to_le32(UDF_INVALID_ID);
else
fe->uid = cpu_to_le32(i_uid_read(inode));
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET))
- fe->gid = cpu_to_le32(-1);
+ fe->gid = cpu_to_le32(UDF_INVALID_ID);
else
fe->gid = cpu_to_le32(i_gid_read(inode));
diff --git a/fs/udf/super.c b/fs/udf/super.c
index f73239a9a97d..7949c338efa5 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -64,14 +64,13 @@
#include <linux/init.h>
#include <linux/uaccess.h>
-#define VDS_POS_PRIMARY_VOL_DESC 0
-#define VDS_POS_UNALLOC_SPACE_DESC 1
-#define VDS_POS_LOGICAL_VOL_DESC 2
-#define VDS_POS_PARTITION_DESC 3
-#define VDS_POS_IMP_USE_VOL_DESC 4
-#define VDS_POS_VOL_DESC_PTR 5
-#define VDS_POS_TERMINATING_DESC 6
-#define VDS_POS_LENGTH 7
+enum {
+ VDS_POS_PRIMARY_VOL_DESC,
+ VDS_POS_UNALLOC_SPACE_DESC,
+ VDS_POS_LOGICAL_VOL_DESC,
+ VDS_POS_IMP_USE_VOL_DESC,
+ VDS_POS_LENGTH
+};
#define VSD_FIRST_SECTOR_OFFSET 32768
#define VSD_MAX_SECTOR_OFFSET 0x800000
@@ -223,10 +222,6 @@ struct udf_options {
unsigned int session;
unsigned int lastblock;
unsigned int anchor;
- unsigned int volume;
- unsigned short partition;
- unsigned int fileset;
- unsigned int rootdir;
unsigned int flags;
umode_t umask;
kgid_t gid;
@@ -349,12 +344,8 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",shortad");
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_FORGET))
seq_puts(seq, ",uid=forget");
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_IGNORE))
- seq_puts(seq, ",uid=ignore");
if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_FORGET))
seq_puts(seq, ",gid=forget");
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_IGNORE))
- seq_puts(seq, ",gid=ignore");
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET))
seq_printf(seq, ",uid=%u", from_kuid(&init_user_ns, sbi->s_uid));
if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET))
@@ -371,10 +362,6 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",lastblock=%u", sbi->s_last_block);
if (sbi->s_anchor != 0)
seq_printf(seq, ",anchor=%u", sbi->s_anchor);
- /*
- * volume, partition, fileset and rootdir seem to be ignored
- * currently
- */
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
seq_puts(seq, ",utf8");
if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP) && sbi->s_nls_map)
@@ -487,14 +474,9 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
int option;
uopt->novrs = 0;
- uopt->partition = 0xFFFF;
uopt->session = 0xFFFFFFFF;
uopt->lastblock = 0;
uopt->anchor = 0;
- uopt->volume = 0xFFFFFFFF;
- uopt->rootdir = 0xFFFFFFFF;
- uopt->fileset = 0xFFFFFFFF;
- uopt->nls_map = NULL;
if (!options)
return 1;
@@ -582,42 +564,30 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
uopt->anchor = option;
break;
case Opt_volume:
- if (match_int(args, &option))
- return 0;
- uopt->volume = option;
- break;
case Opt_partition:
- if (match_int(args, &option))
- return 0;
- uopt->partition = option;
- break;
case Opt_fileset:
- if (match_int(args, &option))
- return 0;
- uopt->fileset = option;
- break;
case Opt_rootdir:
- if (match_int(args, &option))
- return 0;
- uopt->rootdir = option;
+ /* Ignored (never implemented properly) */
break;
case Opt_utf8:
uopt->flags |= (1 << UDF_FLAG_UTF8);
break;
#ifdef CONFIG_UDF_NLS
case Opt_iocharset:
- uopt->nls_map = load_nls(args[0].from);
- uopt->flags |= (1 << UDF_FLAG_NLS_MAP);
+ if (!remount) {
+ if (uopt->nls_map)
+ unload_nls(uopt->nls_map);
+ uopt->nls_map = load_nls(args[0].from);
+ uopt->flags |= (1 << UDF_FLAG_NLS_MAP);
+ }
break;
#endif
- case Opt_uignore:
- uopt->flags |= (1 << UDF_FLAG_UID_IGNORE);
- break;
case Opt_uforget:
uopt->flags |= (1 << UDF_FLAG_UID_FORGET);
break;
+ case Opt_uignore:
case Opt_gignore:
- uopt->flags |= (1 << UDF_FLAG_GID_IGNORE);
+ /* These options are superseeded by uid=<number> */
break;
case Opt_gforget:
uopt->flags |= (1 << UDF_FLAG_GID_FORGET);
@@ -660,6 +630,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
uopt.umask = sbi->s_umask;
uopt.fmode = sbi->s_fmode;
uopt.dmode = sbi->s_dmode;
+ uopt.nls_map = NULL;
if (!udf_parse_options(options, &uopt, true))
return -EINVAL;
@@ -1592,6 +1563,60 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
sbi->s_lvid_bh = NULL;
}
+/*
+ * Step for reallocation of table of partition descriptor sequence numbers.
+ * Must be power of 2.
+ */
+#define PART_DESC_ALLOC_STEP 32
+
+struct desc_seq_scan_data {
+ struct udf_vds_record vds[VDS_POS_LENGTH];
+ unsigned int size_part_descs;
+ struct udf_vds_record *part_descs_loc;
+};
+
+static struct udf_vds_record *handle_partition_descriptor(
+ struct buffer_head *bh,
+ struct desc_seq_scan_data *data)
+{
+ struct partitionDesc *desc = (struct partitionDesc *)bh->b_data;
+ int partnum;
+
+ partnum = le16_to_cpu(desc->partitionNumber);
+ if (partnum >= data->size_part_descs) {
+ struct udf_vds_record *new_loc;
+ unsigned int new_size = ALIGN(partnum, PART_DESC_ALLOC_STEP);
+
+ new_loc = kzalloc(sizeof(*new_loc) * new_size, GFP_KERNEL);
+ if (!new_loc)
+ return ERR_PTR(-ENOMEM);
+ memcpy(new_loc, data->part_descs_loc,
+ data->size_part_descs * sizeof(*new_loc));
+ kfree(data->part_descs_loc);
+ data->part_descs_loc = new_loc;
+ data->size_part_descs = new_size;
+ }
+ return &(data->part_descs_loc[partnum]);
+}
+
+
+static struct udf_vds_record *get_volume_descriptor_record(uint16_t ident,
+ struct buffer_head *bh, struct desc_seq_scan_data *data)
+{
+ switch (ident) {
+ case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */
+ return &(data->vds[VDS_POS_PRIMARY_VOL_DESC]);
+ case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */
+ return &(data->vds[VDS_POS_IMP_USE_VOL_DESC]);
+ case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */
+ return &(data->vds[VDS_POS_LOGICAL_VOL_DESC]);
+ case TAG_IDENT_USD: /* ISO 13346 3/10.8 */
+ return &(data->vds[VDS_POS_UNALLOC_SPACE_DESC]);
+ case TAG_IDENT_PD: /* ISO 13346 3/10.5 */
+ return handle_partition_descriptor(bh, data);
+ }
+ return NULL;
+}
/*
* Process a main/reserve volume descriptor sequence.
@@ -1608,18 +1633,23 @@ static noinline int udf_process_sequence(
struct kernel_lb_addr *fileset)
{
struct buffer_head *bh = NULL;
- struct udf_vds_record vds[VDS_POS_LENGTH];
struct udf_vds_record *curr;
struct generic_desc *gd;
struct volDescPtr *vdp;
bool done = false;
uint32_t vdsn;
uint16_t ident;
- long next_s = 0, next_e = 0;
int ret;
unsigned int indirections = 0;
-
- memset(vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH);
+ struct desc_seq_scan_data data;
+ unsigned int i;
+
+ memset(data.vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH);
+ data.size_part_descs = PART_DESC_ALLOC_STEP;
+ data.part_descs_loc = kzalloc(sizeof(*data.part_descs_loc) *
+ data.size_part_descs, GFP_KERNEL);
+ if (!data.part_descs_loc)
+ return -ENOMEM;
/*
* Read the main descriptor sequence and find which descriptors
@@ -1628,79 +1658,51 @@ static noinline int udf_process_sequence(
for (; (!done && block <= lastblock); block++) {
bh = udf_read_tagged(sb, block, block, &ident);
- if (!bh) {
- udf_err(sb,
- "Block %llu of volume descriptor sequence is corrupted or we could not read it\n",
- (unsigned long long)block);
- return -EAGAIN;
- }
+ if (!bh)
+ break;
/* Process each descriptor (ISO 13346 3/8.3-8.4) */
gd = (struct generic_desc *)bh->b_data;
vdsn = le32_to_cpu(gd->volDescSeqNum);
switch (ident) {
- case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */
- curr = &vds[VDS_POS_PRIMARY_VOL_DESC];
- if (vdsn >= curr->volDescSeqNum) {
- curr->volDescSeqNum = vdsn;
- curr->block = block;
- }
- break;
case TAG_IDENT_VDP: /* ISO 13346 3/10.3 */
- curr = &vds[VDS_POS_VOL_DESC_PTR];
- if (vdsn >= curr->volDescSeqNum) {
- curr->volDescSeqNum = vdsn;
- curr->block = block;
-
- vdp = (struct volDescPtr *)bh->b_data;
- next_s = le32_to_cpu(
- vdp->nextVolDescSeqExt.extLocation);
- next_e = le32_to_cpu(
- vdp->nextVolDescSeqExt.extLength);
- next_e = next_e >> sb->s_blocksize_bits;
- next_e += next_s;
+ if (++indirections > UDF_MAX_TD_NESTING) {
+ udf_err(sb, "too many Volume Descriptor "
+ "Pointers (max %u supported)\n",
+ UDF_MAX_TD_NESTING);
+ brelse(bh);
+ return -EIO;
}
+
+ vdp = (struct volDescPtr *)bh->b_data;
+ block = le32_to_cpu(vdp->nextVolDescSeqExt.extLocation);
+ lastblock = le32_to_cpu(
+ vdp->nextVolDescSeqExt.extLength) >>
+ sb->s_blocksize_bits;
+ lastblock += block - 1;
+ /* For loop is going to increment 'block' again */
+ block--;
break;
+ case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */
case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */
- curr = &vds[VDS_POS_IMP_USE_VOL_DESC];
- if (vdsn >= curr->volDescSeqNum) {
- curr->volDescSeqNum = vdsn;
- curr->block = block;
- }
- break;
- case TAG_IDENT_PD: /* ISO 13346 3/10.5 */
- curr = &vds[VDS_POS_PARTITION_DESC];
- if (!curr->block)
- curr->block = block;
- break;
case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */
- curr = &vds[VDS_POS_LOGICAL_VOL_DESC];
- if (vdsn >= curr->volDescSeqNum) {
- curr->volDescSeqNum = vdsn;
- curr->block = block;
- }
- break;
case TAG_IDENT_USD: /* ISO 13346 3/10.8 */
- curr = &vds[VDS_POS_UNALLOC_SPACE_DESC];
+ case TAG_IDENT_PD: /* ISO 13346 3/10.5 */
+ curr = get_volume_descriptor_record(ident, bh, &data);
+ if (IS_ERR(curr)) {
+ brelse(bh);
+ return PTR_ERR(curr);
+ }
+ /* Descriptor we don't care about? */
+ if (!curr)
+ break;
if (vdsn >= curr->volDescSeqNum) {
curr->volDescSeqNum = vdsn;
curr->block = block;
}
break;
case TAG_IDENT_TD: /* ISO 13346 3/10.9 */
- if (++indirections > UDF_MAX_TD_NESTING) {
- udf_err(sb, "too many TDs (max %u supported)\n", UDF_MAX_TD_NESTING);
- brelse(bh);
- return -EIO;
- }
-
- vds[VDS_POS_TERMINATING_DESC].block = block;
- if (next_e) {
- block = next_s;
- lastblock = next_e;
- next_s = next_e = 0;
- } else
- done = true;
+ done = true;
break;
}
brelse(bh);
@@ -1709,31 +1711,27 @@ static noinline int udf_process_sequence(
* Now read interesting descriptors again and process them
* in a suitable order
*/
- if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) {
+ if (!data.vds[VDS_POS_PRIMARY_VOL_DESC].block) {
udf_err(sb, "Primary Volume Descriptor not found!\n");
return -EAGAIN;
}
- ret = udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block);
+ ret = udf_load_pvoldesc(sb, data.vds[VDS_POS_PRIMARY_VOL_DESC].block);
if (ret < 0)
return ret;
- if (vds[VDS_POS_LOGICAL_VOL_DESC].block) {
+ if (data.vds[VDS_POS_LOGICAL_VOL_DESC].block) {
ret = udf_load_logicalvol(sb,
- vds[VDS_POS_LOGICAL_VOL_DESC].block,
- fileset);
+ data.vds[VDS_POS_LOGICAL_VOL_DESC].block,
+ fileset);
if (ret < 0)
return ret;
}
- if (vds[VDS_POS_PARTITION_DESC].block) {
- /*
- * We rescan the whole descriptor sequence to find
- * partition descriptor blocks and process them.
- */
- for (block = vds[VDS_POS_PARTITION_DESC].block;
- block < vds[VDS_POS_TERMINATING_DESC].block;
- block++) {
- ret = udf_load_partdesc(sb, block);
+ /* Now handle prevailing Partition Descriptors */
+ for (i = 0; i < data.size_part_descs; i++) {
+ if (data.part_descs_loc[i].block) {
+ ret = udf_load_partdesc(sb,
+ data.part_descs_loc[i].block);
if (ret < 0)
return ret;
}
@@ -1760,13 +1758,13 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
main_s = le32_to_cpu(anchor->mainVolDescSeqExt.extLocation);
main_e = le32_to_cpu(anchor->mainVolDescSeqExt.extLength);
main_e = main_e >> sb->s_blocksize_bits;
- main_e += main_s;
+ main_e += main_s - 1;
/* Locate the reserve sequence */
reserve_s = le32_to_cpu(anchor->reserveVolDescSeqExt.extLocation);
reserve_e = le32_to_cpu(anchor->reserveVolDescSeqExt.extLength);
reserve_e = reserve_e >> sb->s_blocksize_bits;
- reserve_e += reserve_s;
+ reserve_e += reserve_s - 1;
/* Process the main & reserve sequences */
/* responsible for finding the PartitionDesc(s) */
@@ -1994,7 +1992,10 @@ static void udf_open_lvid(struct super_block *sb)
lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
ktime_get_real_ts(&ts);
udf_time_to_disk_stamp(&lvid->recordingDateAndTime, ts);
- lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN);
+ if (le32_to_cpu(lvid->integrityType) == LVID_INTEGRITY_TYPE_CLOSE)
+ lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN);
+ else
+ UDF_SET_FLAG(sb, UDF_FLAG_INCONSISTENT);
lvid->descTag.descCRC = cpu_to_le16(
crc_itu_t(0, (char *)lvid + sizeof(struct tag),
@@ -2034,7 +2035,8 @@ static void udf_close_lvid(struct super_block *sb)
lvidiu->minUDFReadRev = cpu_to_le16(sbi->s_udfrev);
if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFWriteRev))
lvidiu->minUDFWriteRev = cpu_to_le16(sbi->s_udfrev);
- lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE);
+ if (!UDF_QUERY_FLAG(sb, UDF_FLAG_INCONSISTENT))
+ lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE);
lvid->descTag.descCRC = cpu_to_le16(
crc_itu_t(0, (char *)lvid + sizeof(struct tag),
@@ -2091,11 +2093,13 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
bool lvid_open = false;
uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT);
- uopt.uid = INVALID_UID;
- uopt.gid = INVALID_GID;
+ /* By default we'll use overflow[ug]id when UDF inode [ug]id == -1 */
+ uopt.uid = make_kuid(current_user_ns(), overflowuid);
+ uopt.gid = make_kgid(current_user_ns(), overflowgid);
uopt.umask = 0;
uopt.fmode = UDF_INVALID_MODE;
uopt.dmode = UDF_INVALID_MODE;
+ uopt.nls_map = NULL;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
@@ -2276,8 +2280,8 @@ error_out:
iput(sbi->s_vat_inode);
parse_options_failure:
#ifdef CONFIG_UDF_NLS
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
- unload_nls(sbi->s_nls_map);
+ if (uopt.nls_map)
+ unload_nls(uopt.nls_map);
#endif
if (lvid_open)
udf_close_lvid(sb);
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 68c9f1d618f5..9dd3e1b9619e 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -23,14 +23,13 @@
#define UDF_FLAG_NLS_MAP 9
#define UDF_FLAG_UTF8 10
#define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */
-#define UDF_FLAG_UID_IGNORE 12 /* use sb uid instead of on disk uid */
-#define UDF_FLAG_GID_FORGET 13
-#define UDF_FLAG_GID_IGNORE 14
-#define UDF_FLAG_UID_SET 15
-#define UDF_FLAG_GID_SET 16
-#define UDF_FLAG_SESSION_SET 17
-#define UDF_FLAG_LASTBLOCK_SET 18
-#define UDF_FLAG_BLOCKSIZE_SET 19
+#define UDF_FLAG_GID_FORGET 12
+#define UDF_FLAG_UID_SET 13
+#define UDF_FLAG_GID_SET 14
+#define UDF_FLAG_SESSION_SET 15
+#define UDF_FLAG_LASTBLOCK_SET 16
+#define UDF_FLAG_BLOCKSIZE_SET 17
+#define UDF_FLAG_INCONSISTENT 18
#define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001
#define UDF_PART_FLAG_UNALLOC_TABLE 0x0002
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index f5e0fe78979e..68e8a64d22e0 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -48,6 +48,8 @@ extern __printf(3, 4) void _udf_warn(struct super_block *sb,
#define UDF_EXTENT_LENGTH_MASK 0x3FFFFFFF
#define UDF_EXTENT_FLAG_MASK 0xC0000000
+#define UDF_INVALID_ID ((uint32_t)-1)
+
#define UDF_NAME_PAD 4
#define UDF_NAME_LEN 254
#define UDF_NAME_LEN_CS0 255
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 19eadc807056..31f1f10eecd1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1390,7 +1390,7 @@ xfs_vm_bmap(
/*
* The swap code (ab-)uses ->bmap to get a block mapping and then
- * bypasseѕ the file system for actual I/O. We really can't allow
+ * bypasses the file system for actual I/O. We really can't allow
* that on reflinks inodes, so we have to skip out here. And yes,
* 0 is the magic code for a bmap error.
*
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 761f3189eff2..eed698aa9f16 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -122,7 +122,7 @@ xfs_nfs_get_inode(
struct super_block *sb,
u64 ino,
u32 generation)
- {
+{
xfs_mount_t *mp = XFS_M(sb);
xfs_inode_t *ip;
int error;