aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/cache.h1
-rw-r--r--fs/binfmt_elf.c12
-rw-r--r--fs/buffer.c24
-rw-r--r--fs/cachefiles/namei.c2
-rw-r--r--fs/cachefiles/rdwr.c73
-rw-r--r--fs/dax.c5
-rw-r--r--fs/debugfs/inode.c6
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/fscache/netfs.c38
-rw-r--r--fs/fscache/page.c2
-rw-r--r--fs/lockd/host.c8
-rw-r--r--fs/lockd/mon.c125
-rw-r--r--fs/lockd/netns.h4
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/lockd/svc4proc.c2
-rw-r--r--fs/lockd/svcproc.c2
-rw-r--r--fs/nfsd/nfs3xdr.c4
-rw-r--r--fs/nfsd/nfs4layouts.c34
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c265
-rw-r--r--fs/nfsd/nfscache.c32
-rw-r--r--fs/nfsd/nfsfh.c5
-rw-r--r--fs/nfsd/nfsfh.h20
-rw-r--r--fs/nfsd/state.h43
-rw-r--r--fs/nfsd/trace.c2
-rw-r--r--fs/nfsd/trace.h2
-rw-r--r--fs/nfsd/vfs.c4
-rw-r--r--fs/nfsd/vfs.h4
-rw-r--r--fs/nfsd/xdr4.h2
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/pipe.c18
-rw-r--r--fs/xfs/Makefile2
-rw-r--r--fs/xfs/kmem.c10
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c30
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h8
-rw-r--r--fs/xfs/libxfs/xfs_attr.c6
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c3
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c65
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h13
-rw-r--r--fs/xfs/libxfs/xfs_btree.c21
-rw-r--r--fs/xfs/libxfs/xfs_btree.h39
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c6
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c3
-rw-r--r--fs/xfs/libxfs/xfs_format.h18
-rw-r--r--fs/xfs/libxfs/xfs_fs.h10
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c10
-rw-r--r--fs/xfs/libxfs/xfs_sb.c10
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c7
-rw-r--r--fs/xfs/xfs_acl.c14
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_aops.c119
-rw-r--r--fs/xfs/xfs_aops.h3
-rw-r--r--fs/xfs/xfs_attr_list.c2
-rw-r--r--fs/xfs/xfs_bmap_util.c38
-rw-r--r--fs/xfs/xfs_buf.c21
-rw-r--r--fs/xfs/xfs_dir2_readdir.c2
-rw-r--r--fs/xfs/xfs_dquot.c14
-rw-r--r--fs/xfs/xfs_file.c116
-rw-r--r--fs/xfs/xfs_icache.c18
-rw-r--r--fs/xfs/xfs_inode.c8
-rw-r--r--fs/xfs/xfs_inode_item.c1
-rw-r--r--fs/xfs/xfs_inode_item.h1
-rw-r--r--fs/xfs/xfs_ioctl.c23
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c70
-rw-r--r--fs/xfs/xfs_iops.c4
-rw-r--r--fs/xfs/xfs_linux.h7
-rw-r--r--fs/xfs/xfs_log.c93
-rw-r--r--fs/xfs/xfs_log.h1
-rw-r--r--fs/xfs/xfs_log_priv.h51
-rw-r--r--fs/xfs/xfs_log_recover.c14
-rw-r--r--fs/xfs/xfs_message.c7
-rw-r--r--fs/xfs/xfs_mount.c21
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_pnfs.c5
-rw-r--r--fs/xfs/xfs_qm.c14
-rw-r--r--fs/xfs/xfs_stats.c93
-rw-r--r--fs/xfs/xfs_stats.h36
-rw-r--r--fs/xfs/xfs_super.c57
-rw-r--r--fs/xfs/xfs_sysctl.c15
-rw-r--r--fs/xfs/xfs_sysfs.c185
-rw-r--r--fs/xfs/xfs_sysfs.h1
-rw-r--r--fs/xfs/xfs_trace.h2
-rw-r--r--fs/xfs/xfs_trans.c6
-rw-r--r--fs/xfs/xfs_trans_ail.c13
-rw-r--r--fs/xfs/xfs_trans_inode.c9
-rw-r--r--fs/xfs/xfs_xattr.c31
92 files changed, 1396 insertions, 757 deletions
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index 2f9675491095..247e47e54bcc 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -21,6 +21,7 @@
*/
#ifndef _9P_CACHE_H
+#define _9P_CACHE_H
#ifdef CONFIG_9P_FSCACHE
#include <linux/fscache.h>
#include <linux/spinlock.h>
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5f399ea1d20a..3a93755e880f 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -488,7 +488,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
}
/**
- * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
+ * arch_check_elf() - check an ELF executable
* @ehdr: The main ELF header
* @has_interp: True if the ELF has an interpreter, else false.
* @state: Architecture-specific state preserved throughout the process
@@ -760,16 +760,16 @@ static int load_elf_binary(struct linux_binprm *bprm)
*/
would_dump(bprm, interpreter);
- retval = kernel_read(interpreter, 0, bprm->buf,
- BINPRM_BUF_SIZE);
- if (retval != BINPRM_BUF_SIZE) {
+ /* Get the exec headers */
+ retval = kernel_read(interpreter, 0,
+ (void *)&loc->interp_elf_ex,
+ sizeof(loc->interp_elf_ex));
+ if (retval != sizeof(loc->interp_elf_ex)) {
if (retval >= 0)
retval = -EIO;
goto out_free_dentry;
}
- /* Get the exec headers */
- loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
break;
}
elf_ppnt++;
diff --git a/fs/buffer.c b/fs/buffer.c
index 51aff0296ce2..4f4cd959da7c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2420,9 +2420,9 @@ EXPORT_SYMBOL(block_commit_write);
* unlock the page.
*
* Direct callers of this function should protect against filesystem freezing
- * using sb_start_write() - sb_end_write() functions.
+ * using sb_start_pagefault() - sb_end_pagefault() functions.
*/
-int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
+int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block)
{
struct page *page = vmf->page;
@@ -2459,26 +2459,6 @@ out_unlock:
unlock_page(page);
return ret;
}
-EXPORT_SYMBOL(__block_page_mkwrite);
-
-int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
- get_block_t get_block)
-{
- int ret;
- struct super_block *sb = file_inode(vma->vm_file)->i_sb;
-
- sb_start_pagefault(sb);
-
- /*
- * Update file times before taking page lock. We may end up failing the
- * fault so this update may be superfluous but who really cares...
- */
- file_update_time(vma->vm_file);
-
- ret = __block_page_mkwrite(vma, vmf, get_block);
- sb_end_pagefault(sb);
- return block_page_mkwrite_return(ret);
-}
EXPORT_SYMBOL(block_page_mkwrite);
/*
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index fc1056f5c96a..c4b893453e0e 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -655,6 +655,8 @@ lookup_again:
aops = d_backing_inode(object->dentry)->i_mapping->a_ops;
if (!aops->bmap)
goto check_error;
+ if (object->dentry->d_sb->s_blocksize > PAGE_SIZE)
+ goto check_error;
object->backer = object->dentry;
} else {
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 3cbb0e834694..7a6b02f72787 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -414,9 +414,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
ASSERT(inode->i_mapping->a_ops->readpages);
/* calculate the shift required to use bmap */
- if (inode->i_sb->s_blocksize > PAGE_SIZE)
- goto enobufs;
-
shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
@@ -711,9 +708,6 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
ASSERT(inode->i_mapping->a_ops->readpages);
/* calculate the shift required to use bmap */
- if (inode->i_sb->s_blocksize > PAGE_SIZE)
- goto all_enobufs;
-
shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
pagevec_init(&pagevec, 0);
@@ -905,6 +899,15 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
cache = container_of(object->fscache.cache,
struct cachefiles_cache, cache);
+ pos = (loff_t)page->index << PAGE_SHIFT;
+
+ /* We mustn't write more data than we have, so we have to beware of a
+ * partial page at EOF.
+ */
+ eof = object->fscache.store_limit_l;
+ if (pos >= eof)
+ goto error;
+
/* write the page to the backing filesystem and let it store it in its
* own time */
path.mnt = cache->mnt;
@@ -912,40 +915,38 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred);
if (IS_ERR(file)) {
ret = PTR_ERR(file);
- } else {
- pos = (loff_t) page->index << PAGE_SHIFT;
-
- /* we mustn't write more data than we have, so we have
- * to beware of a partial page at EOF */
- eof = object->fscache.store_limit_l;
- len = PAGE_SIZE;
- if (eof & ~PAGE_MASK) {
- ASSERTCMP(pos, <, eof);
- if (eof - pos < PAGE_SIZE) {
- _debug("cut short %llx to %llx",
- pos, eof);
- len = eof - pos;
- ASSERTCMP(pos + len, ==, eof);
- }
- }
-
- data = kmap(page);
- ret = __kernel_write(file, data, len, &pos);
- kunmap(page);
- if (ret != len)
- ret = -EIO;
- fput(file);
+ goto error_2;
}
- if (ret < 0) {
- if (ret == -EIO)
- cachefiles_io_error_obj(
- object, "Write page to backing file failed");
- ret = -ENOBUFS;
+ len = PAGE_SIZE;
+ if (eof & ~PAGE_MASK) {
+ if (eof - pos < PAGE_SIZE) {
+ _debug("cut short %llx to %llx",
+ pos, eof);
+ len = eof - pos;
+ ASSERTCMP(pos + len, ==, eof);
+ }
}
- _leave(" = %d", ret);
- return ret;
+ data = kmap(page);
+ ret = __kernel_write(file, data, len, &pos);
+ kunmap(page);
+ fput(file);
+ if (ret != len)
+ goto error_eio;
+
+ _leave(" = 0");
+ return 0;
+
+error_eio:
+ ret = -EIO;
+error_2:
+ if (ret == -EIO)
+ cachefiles_io_error_obj(object,
+ "Write page to backing file failed");
+error:
+ _leave(" = -ENOBUFS [%d]", ret);
+ return -ENOBUFS;
}
/*
diff --git a/fs/dax.c b/fs/dax.c
index 7b653e9aa8d1..8e17b371aeb8 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -29,6 +29,11 @@
#include <linux/uio.h>
#include <linux/vmstat.h>
+/*
+ * dax_clear_blocks() is called from within transaction context from XFS,
+ * and hence this means the stack from this point must follow GFP_NOFS
+ * semantics for all operations.
+ */
int dax_clear_blocks(struct inode *inode, sector_t block, long size)
{
struct block_device *bdev = inode->i_sb->s_bdev;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 5d8f35f1382a..b7fcc0de0b2f 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -271,8 +271,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
dput(dentry);
dentry = ERR_PTR(-EEXIST);
}
- if (IS_ERR(dentry))
+
+ if (IS_ERR(dentry)) {
mutex_unlock(&d_inode(parent)->i_mutex);
+ simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+ }
+
return dentry;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7d1aad1d9313..ea433a7f4bca 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5283,7 +5283,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
!ext4_should_journal_data(inode) &&
!ext4_nonda_switch(inode->i_sb)) {
do {
- ret = __block_page_mkwrite(vma, vmf,
+ ret = block_page_mkwrite(vma, vmf,
ext4_da_get_block_prep);
} while (ret == -ENOSPC &&
ext4_should_retry_alloc(inode->i_sb, &retries));
@@ -5330,7 +5330,7 @@ retry_alloc:
ret = VM_FAULT_SIGBUS;
goto out;
}
- ret = __block_page_mkwrite(vma, vmf, get_block);
+ ret = block_page_mkwrite(vma, vmf, get_block);
if (!ret && ext4_should_journal_data(inode)) {
if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
index 6d941f56faf4..9b28649df3a1 100644
--- a/fs/fscache/netfs.c
+++ b/fs/fscache/netfs.c
@@ -22,6 +22,7 @@ static LIST_HEAD(fscache_netfs_list);
int __fscache_register_netfs(struct fscache_netfs *netfs)
{
struct fscache_netfs *ptr;
+ struct fscache_cookie *cookie;
int ret;
_enter("{%s}", netfs->name);
@@ -29,29 +30,25 @@ int __fscache_register_netfs(struct fscache_netfs *netfs)
INIT_LIST_HEAD(&netfs->link);
/* allocate a cookie for the primary index */
- netfs->primary_index =
- kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL);
+ cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL);
- if (!netfs->primary_index) {
+ if (!cookie) {
_leave(" = -ENOMEM");
return -ENOMEM;
}
/* initialise the primary index cookie */
- atomic_set(&netfs->primary_index->usage, 1);
- atomic_set(&netfs->primary_index->n_children, 0);
- atomic_set(&netfs->primary_index->n_active, 1);
+ atomic_set(&cookie->usage, 1);
+ atomic_set(&cookie->n_children, 0);
+ atomic_set(&cookie->n_active, 1);
- netfs->primary_index->def = &fscache_fsdef_netfs_def;
- netfs->primary_index->parent = &fscache_fsdef_index;
- netfs->primary_index->netfs_data = netfs;
- netfs->primary_index->flags = 1 << FSCACHE_COOKIE_ENABLED;
+ cookie->def = &fscache_fsdef_netfs_def;
+ cookie->parent = &fscache_fsdef_index;
+ cookie->netfs_data = netfs;
+ cookie->flags = 1 << FSCACHE_COOKIE_ENABLED;
- atomic_inc(&netfs->primary_index->parent->usage);
- atomic_inc(&netfs->primary_index->parent->n_children);
-
- spin_lock_init(&netfs->primary_index->lock);
- INIT_HLIST_HEAD(&netfs->primary_index->backing_objects);
+ spin_lock_init(&cookie->lock);
+ INIT_HLIST_HEAD(&cookie->backing_objects);
/* check the netfs type is not already present */
down_write(&fscache_addremove_sem);
@@ -62,6 +59,10 @@ int __fscache_register_netfs(struct fscache_netfs *netfs)
goto already_registered;
}
+ atomic_inc(&cookie->parent->usage);
+ atomic_inc(&cookie->parent->n_children);
+
+ netfs->primary_index = cookie;
list_add(&netfs->link, &fscache_netfs_list);
ret = 0;
@@ -70,11 +71,8 @@ int __fscache_register_netfs(struct fscache_netfs *netfs)
already_registered:
up_write(&fscache_addremove_sem);
- if (ret < 0) {
- netfs->primary_index->parent = NULL;
- __fscache_cookie_put(netfs->primary_index);
- netfs->primary_index = NULL;
- }
+ if (ret < 0)
+ kmem_cache_free(fscache_cookie_jar, cookie);
_leave(" = %d", ret);
return ret;
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 79483b3d8c6f..6b35fc4860a0 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -816,7 +816,7 @@ static void fscache_write_op(struct fscache_operation *_op)
goto superseded;
page = results[0];
_debug("gang %d [%lx]", n, page->index);
- if (page->index > op->store_limit) {
+ if (page->index >= op->store_limit) {
fscache_stat(&fscache_n_store_pages_over_limit);
goto superseded;
}
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 969d589c848d..d716c9993a26 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -116,7 +116,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
atomic_inc(&nsm->sm_count);
else {
host = NULL;
- nsm = nsm_get_handle(ni->sap, ni->salen,
+ nsm = nsm_get_handle(ni->net, ni->sap, ni->salen,
ni->hostname, ni->hostname_len);
if (unlikely(nsm == NULL)) {
dprintk("lockd: %s failed; no nsm handle\n",
@@ -161,6 +161,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
host->h_nsmhandle = nsm;
host->h_addrbuf = nsm->sm_addrbuf;
host->net = ni->net;
+ strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
out:
return host;
@@ -534,17 +535,18 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
/**
* nlm_host_rebooted - Release all resources held by rebooted host
+ * @net: network namespace
* @info: pointer to decoded results of NLM_SM_NOTIFY call
*
* We were notified that the specified host has rebooted. Release
* all resources held by that peer.
*/
-void nlm_host_rebooted(const struct nlm_reboot *info)
+void nlm_host_rebooted(const struct net *net, const struct nlm_reboot *info)
{
struct nsm_handle *nsm;
struct nlm_host *host;
- nsm = nsm_reboot_lookup(info);
+ nsm = nsm_reboot_lookup(net, info);
if (unlikely(nsm == NULL))
return;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 47a32b6d9b90..19166d4a8d31 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -42,7 +42,7 @@ struct nsm_args {
u32 proc;
char *mon_name;
- char *nodename;
+ const char *nodename;
};
struct nsm_res {
@@ -51,7 +51,6 @@ struct nsm_res {
};
static const struct rpc_program nsm_program;
-static LIST_HEAD(nsm_handles);
static DEFINE_SPINLOCK(nsm_lock);
/*
@@ -87,69 +86,18 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename)
return rpc_create(&args);
}
-static struct rpc_clnt *nsm_client_set(struct lockd_net *ln,
- struct rpc_clnt *clnt)
-{
- spin_lock(&ln->nsm_clnt_lock);
- if (ln->nsm_users == 0) {
- if (clnt == NULL)
- goto out;
- ln->nsm_clnt = clnt;
- }
- clnt = ln->nsm_clnt;
- ln->nsm_users++;
-out:
- spin_unlock(&ln->nsm_clnt_lock);
- return clnt;
-}
-
-static struct rpc_clnt *nsm_client_get(struct net *net, const char *nodename)
-{
- struct rpc_clnt *clnt, *new;
- struct lockd_net *ln = net_generic(net, lockd_net_id);
-
- clnt = nsm_client_set(ln, NULL);
- if (clnt != NULL)
- goto out;
-
- clnt = new = nsm_create(net, nodename);
- if (IS_ERR(clnt))
- goto out;
-
- clnt = nsm_client_set(ln, new);
- if (clnt != new)
- rpc_shutdown_client(new);
-out:
- return clnt;
-}
-
-static void nsm_client_put(struct net *net)
-{
- struct lockd_net *ln = net_generic(net, lockd_net_id);
- struct rpc_clnt *clnt = NULL;
-
- spin_lock(&ln->nsm_clnt_lock);
- ln->nsm_users--;
- if (ln->nsm_users == 0) {
- clnt = ln->nsm_clnt;
- ln->nsm_clnt = NULL;
- }
- spin_unlock(&ln->nsm_clnt_lock);
- if (clnt != NULL)
- rpc_shutdown_client(clnt);
-}
-
static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
- struct rpc_clnt *clnt)
+ const struct nlm_host *host)
{
int status;
+ struct rpc_clnt *clnt;
struct nsm_args args = {
.priv = &nsm->sm_priv,
.prog = NLM_PROGRAM,
.vers = 3,
.proc = NLMPROC_NSM_NOTIFY,
.mon_name = nsm->sm_mon_name,
- .nodename = clnt->cl_nodename,
+ .nodename = host->nodename,
};
struct rpc_message msg = {
.rpc_argp = &args,
@@ -158,6 +106,13 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
memset(res, 0, sizeof(*res));
+ clnt = nsm_create(host->net, host->nodename);
+ if (IS_ERR(clnt)) {
+ dprintk("lockd: failed to create NSM upcall transport, "
+ "status=%ld, net=%p\n", PTR_ERR(clnt), host->net);
+ return PTR_ERR(clnt);
+ }
+
msg.rpc_proc = &clnt->cl_procinfo[proc];
status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
if (status == -ECONNREFUSED) {
@@ -171,6 +126,8 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
status);
else
status = 0;
+
+ rpc_shutdown_client(clnt);
return status;
}
@@ -190,32 +147,19 @@ int nsm_monitor(const struct nlm_host *host)
struct nsm_handle *nsm = host->h_nsmhandle;
struct nsm_res res;
int status;
- struct rpc_clnt *clnt;
- const char *nodename = NULL;
dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name);
if (nsm->sm_monitored)
return 0;
- if (host->h_rpcclnt)
- nodename = host->h_rpcclnt->cl_nodename;
-
/*
* Choose whether to record the caller_name or IP address of
* this peer in the local rpc.statd's database.
*/
nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
- clnt = nsm_client_get(host->net, nodename);
- if (IS_ERR(clnt)) {
- status = PTR_ERR(clnt);
- dprintk("lockd: failed to create NSM upcall transport, "
- "status=%d, net=%p\n", status, host->net);
- return status;
- }
-
- status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, clnt);
+ status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host);
if (unlikely(res.status != 0))
status = -EIO;
if (unlikely(status < 0)) {
@@ -247,11 +191,9 @@ void nsm_unmonitor(const struct nlm_host *host)
if (atomic_read(&nsm->sm_count) == 1
&& nsm->sm_monitored && !nsm->sm_sticky) {
- struct lockd_net *ln = net_generic(host->net, lockd_net_id);
-
dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
- status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, ln->nsm_clnt);
+ status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host);
if (res.status != 0)
status = -EIO;
if (status < 0)
@@ -259,38 +201,38 @@ void nsm_unmonitor(const struct nlm_host *host)
nsm->sm_name);
else
nsm->sm_monitored = 0;
-
- nsm_client_put(host->net);
}
}
-static struct nsm_handle *nsm_lookup_hostname(const char *hostname,
- const size_t len)
+static struct nsm_handle *nsm_lookup_hostname(const struct list_head *nsm_handles,
+ const char *hostname, const size_t len)
{
struct nsm_handle *nsm;
- list_for_each_entry(nsm, &nsm_handles, sm_link)
+ list_for_each_entry(nsm, nsm_handles, sm_link)
if (strlen(nsm->sm_name) == len &&
memcmp(nsm->sm_name, hostname, len) == 0)
return nsm;
return NULL;
}
-static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap)
+static struct nsm_handle *nsm_lookup_addr(const struct list_head *nsm_handles,
+ const struct sockaddr *sap)
{
struct nsm_handle *nsm;
- list_for_each_entry(nsm, &nsm_handles, sm_link)
+ list_for_each_entry(nsm, nsm_handles, sm_link)
if (rpc_cmp_addr(nsm_addr(nsm), sap))
return nsm;
return NULL;
}
-static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv)
+static struct nsm_handle *nsm_lookup_priv(const struct list_head *nsm_handles,
+ const struct nsm_private *priv)
{
struct nsm_handle *nsm;
- list_for_each_entry(nsm, &nsm_handles, sm_link)
+ list_for_each_entry(nsm, nsm_handles, sm_link)
if (memcmp(nsm->sm_priv.data, priv->data,
sizeof(priv->data)) == 0)
return nsm;
@@ -353,6 +295,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
/**
* nsm_get_handle - Find or create a cached nsm_handle
+ * @net: network namespace
* @sap: pointer to socket address of handle to find
* @salen: length of socket address
* @hostname: pointer to C string containing hostname to find
@@ -365,11 +308,13 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
* @hostname cannot be found in the handle cache. Returns NULL if
* an error occurs.
*/
-struct nsm_handle *nsm_get_handle(const struct sockaddr *sap,
+struct nsm_handle *nsm_get_handle(const struct net *net,
+ const struct sockaddr *sap,
const size_t salen, const char *hostname,
const size_t hostname_len)
{
struct nsm_handle *cached, *new = NULL;
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
if (hostname && memchr(hostname, '/', hostname_len) != NULL) {
if (printk_ratelimit()) {
@@ -384,9 +329,10 @@ retry:
spin_lock(&nsm_lock);
if (nsm_use_hostnames && hostname != NULL)
- cached = nsm_lookup_hostname(hostname, hostname_len);
+ cached = nsm_lookup_hostname(&ln->nsm_handles,
+ hostname, hostname_len);
else
- cached = nsm_lookup_addr(sap);
+ cached = nsm_lookup_addr(&ln->nsm_handles, sap);
if (cached != NULL) {
atomic_inc(&cached->sm_count);
@@ -400,7 +346,7 @@ retry:
}
if (new != NULL) {
- list_add(&new->sm_link, &nsm_handles);
+ list_add(&new->sm_link, &ln->nsm_handles);
spin_unlock(&nsm_lock);
dprintk("lockd: created nsm_handle for %s (%s)\n",
new->sm_name, new->sm_addrbuf);
@@ -417,19 +363,22 @@ retry:
/**
* nsm_reboot_lookup - match NLMPROC_SM_NOTIFY arguments to an nsm_handle
+ * @net: network namespace
* @info: pointer to NLMPROC_SM_NOTIFY arguments
*
* Returns a matching nsm_handle if found in the nsm cache. The returned
* nsm_handle's reference count is bumped. Otherwise returns NULL if some
* error occurred.
*/
-struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info)
+struct nsm_handle *nsm_reboot_lookup(const struct net *net,
+ const struct nlm_reboot *info)
{
struct nsm_handle *cached;
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
spin_lock(&nsm_lock);
- cached = nsm_lookup_priv(&info->priv);
+ cached = nsm_lookup_priv(&ln->nsm_handles, &info->priv);
if (unlikely(cached == NULL)) {
spin_unlock(&nsm_lock);
dprintk("lockd: never saw rebooted peer '%.*s' before\n",
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 097bfa3adb1c..5426189406c1 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -12,9 +12,7 @@ struct lockd_net {
struct delayed_work grace_period_end;
struct lock_manager lockd_manager;
- spinlock_t nsm_clnt_lock;
- unsigned int nsm_users;
- struct rpc_clnt *nsm_clnt;
+ struct list_head nsm_handles;
};
extern int lockd_net_id;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index d678bcc3cbcb..5f31ebd96c06 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -592,7 +592,7 @@ static int lockd_init_net(struct net *net)
INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
INIT_LIST_HEAD(&ln->lockd_manager.list);
ln->lockd_manager.block_opens = false;
- spin_lock_init(&ln->nsm_clnt_lock);
+ INIT_LIST_HEAD(&ln->nsm_handles);
return 0;
}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b147d1ae71fd..09c576f26c7b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -421,7 +421,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
return rpc_system_err;
}
- nlm_host_rebooted(argp);
+ nlm_host_rebooted(SVC_NET(rqstp), argp);
return rpc_success;
}
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 21171f0c6477..fb26b9f522e7 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -464,7 +464,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
return rpc_system_err;
}
- nlm_host_rebooted(argp);
+ nlm_host_rebooted(SVC_NET(rqstp), argp);
return rpc_success;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index f6e7cbabac5a..00575d776d91 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -262,11 +262,11 @@ void fill_post_wcc(struct svc_fh *fhp)
err = fh_getattr(fhp, &fhp->fh_post_attr);
fhp->fh_post_change = d_inode(fhp->fh_dentry)->i_version;
if (err) {
- fhp->fh_post_saved = 0;
+ fhp->fh_post_saved = false;
/* Grab the ctime anyway - set_change_info might use it */
fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime;
} else
- fhp->fh_post_saved = 1;
+ fhp->fh_post_saved = true;
}
/*
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ebf90e487c75..9ffef06b30d5 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -201,6 +201,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
INIT_LIST_HEAD(&ls->ls_perfile);
spin_lock_init(&ls->ls_lock);
INIT_LIST_HEAD(&ls->ls_layouts);
+ mutex_init(&ls->ls_mutex);
ls->ls_layout_type = layout_type;
nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops,
NFSPROC4_CLNT_CB_LAYOUT);
@@ -262,19 +263,23 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
status = nfserr_jukebox;
if (!ls)
goto out;
+ mutex_lock(&ls->ls_mutex);
} else {
ls = container_of(stid, struct nfs4_layout_stateid, ls_stid);
status = nfserr_bad_stateid;
+ mutex_lock(&ls->ls_mutex);
if (stateid->si_generation > stid->sc_stateid.si_generation)
- goto out_put_stid;
+ goto out_unlock_stid;
if (layout_type != ls->ls_layout_type)
- goto out_put_stid;
+ goto out_unlock_stid;
}
*lsp = ls;
return 0;
+out_unlock_stid:
+ mutex_unlock(&ls->ls_mutex);
out_put_stid:
nfs4_put_stid(stid);
out:
@@ -296,8 +301,6 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
trace_layout_recall(&ls->ls_stid.sc_stateid);
atomic_inc(&ls->ls_stid.sc_count);
- update_stateid(&ls->ls_stid.sc_stateid);
- memcpy(&ls->ls_recall_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t));
nfsd4_run_cb(&ls->ls_recall);
out_unlock:
@@ -406,8 +409,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
list_add_tail(&new->lo_perstate, &ls->ls_layouts);
new = NULL;
done:
- update_stateid(&ls->ls_stid.sc_stateid);
- memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&lgp->lg_sid, &ls->ls_stid);
spin_unlock(&ls->ls_lock);
out:
spin_unlock(&fp->fi_lock);
@@ -481,11 +483,8 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
}
}
if (!list_empty(&ls->ls_layouts)) {
- if (found) {
- update_stateid(&ls->ls_stid.sc_stateid);
- memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid,
- sizeof(stateid_t));
- }
+ if (found)
+ nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid);
lrp->lrs_present = 1;
} else {
trace_layoutstate_unhash(&ls->ls_stid.sc_stateid);
@@ -494,6 +493,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
}
spin_unlock(&ls->ls_lock);
+ mutex_unlock(&ls->ls_mutex);
nfs4_put_stid(&ls->ls_stid);
nfsd4_free_layouts(&reaplist);
return nfs_ok;
@@ -608,6 +608,16 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
}
}
+static void
+nfsd4_cb_layout_prepare(struct nfsd4_callback *cb)
+{
+ struct nfs4_layout_stateid *ls =
+ container_of(cb, struct nfs4_layout_stateid, ls_recall);
+
+ mutex_lock(&ls->ls_mutex);
+ nfs4_inc_and_copy_stateid(&ls->ls_recall_sid, &ls->ls_stid);
+}
+
static int
nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
@@ -649,12 +659,14 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb)
trace_layout_recall_release(&ls->ls_stid.sc_stateid);
+ mutex_unlock(&ls->ls_mutex);
nfsd4_return_all_layouts(ls, &reaplist);
nfsd4_free_layouts(&reaplist);
nfs4_put_stid(&ls->ls_stid);
}
static struct nfsd4_callback_ops nfsd4_cb_layout_ops = {
+ .prepare = nfsd4_cb_layout_prepare,
.done = nfsd4_cb_layout_done,
.release = nfsd4_cb_layout_release,
};
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4ce6b97b31ad..a9f096c7e99f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1309,6 +1309,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
nfserr = nfsd4_insert_layout(lgp, ls);
out_put_stid:
+ mutex_unlock(&ls->ls_mutex);
nfs4_put_stid(&ls->ls_stid);
out:
return nfserr;
@@ -1362,6 +1363,9 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
goto out;
}
+ /* LAYOUTCOMMIT does not require any serialization */
+ mutex_unlock(&ls->ls_mutex);
+
if (new_size > i_size_read(inode)) {
lcp->lc_size_chg = 1;
lcp->lc_newsize = new_size;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0f1d5691b795..6b800b5b8fed 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -575,6 +575,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
/* Will be incremented before return to client: */
atomic_set(&stid->sc_count, 1);
+ spin_lock_init(&stid->sc_lock);
/*
* It shouldn't be a problem to reuse an opaque stateid value.
@@ -745,6 +746,18 @@ nfs4_put_stid(struct nfs4_stid *s)
put_nfs4_file(fp);
}
+void
+nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
+{
+ stateid_t *src = &stid->sc_stateid;
+
+ spin_lock(&stid->sc_lock);
+ if (unlikely(++src->si_generation == 0))
+ src->si_generation = 1;
+ memcpy(dst, src, sizeof(*dst));
+ spin_unlock(&stid->sc_lock);
+}
+
static void nfs4_put_deleg_lease(struct nfs4_file *fp)
{
struct file *filp = NULL;
@@ -765,16 +778,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s)
s->sc_type = 0;
}
-static void
+/**
+ * nfs4_get_existing_delegation - Discover if this delegation already exists
+ * @clp: a pointer to the nfs4_client we're granting a delegation to
+ * @fp: a pointer to the nfs4_file we're granting a delegation on
+ *
+ * Return:
+ * On success: NULL if an existing delegation was not found.
+ *
+ * On error: -EAGAIN if one was previously granted to this nfs4_client
+ * for this nfs4_file.
+ *
+ */
+
+static int
+nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
+{
+ struct nfs4_delegation *searchdp = NULL;
+ struct nfs4_client *searchclp = NULL;
+
+ lockdep_assert_held(&state_lock);
+ lockdep_assert_held(&fp->fi_lock);
+
+ list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) {
+ searchclp = searchdp->dl_stid.sc_client;
+ if (clp == searchclp) {
+ return -EAGAIN;
+ }
+ }
+ return 0;
+}
+
+/**
+ * hash_delegation_locked - Add a delegation to the appropriate lists
+ * @dp: a pointer to the nfs4_delegation we are adding.
+ * @fp: a pointer to the nfs4_file we're granting a delegation on
+ *
+ * Return:
+ * On success: NULL if the delegation was successfully hashed.
+ *
+ * On error: -EAGAIN if one was previously granted to this
+ * nfs4_client for this nfs4_file. Delegation is not hashed.
+ *
+ */
+
+static int
hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
{
+ int status;
+ struct nfs4_client *clp = dp->dl_stid.sc_client;
+
lockdep_assert_held(&state_lock);
lockdep_assert_held(&fp->fi_lock);
+ status = nfs4_get_existing_delegation(clp, fp);
+ if (status)
+ return status;
+ ++fp->fi_delegees;
atomic_inc(&dp->dl_stid.sc_count);
dp->dl_stid.sc_type = NFS4_DELEG_STID;
list_add(&dp->dl_perfile, &fp->fi_delegations);
- list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
+ list_add(&dp->dl_perclnt, &clp->cl_delegations);
+ return 0;
}
static bool
@@ -2256,15 +2321,20 @@ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
clid->flags = new->cl_exchange_flags;
}
+static bool client_has_openowners(struct nfs4_client *clp)
+{
+ struct nfs4_openowner *oo;
+
+ list_for_each_entry(oo, &clp->cl_openowners, oo_perclient) {
+ if (!list_empty(&oo->oo_owner.so_stateids))
+ return true;
+ }
+ return false;
+}
+
static bool client_has_state(struct nfs4_client *clp)
{
- /*
- * Note clp->cl_openowners check isn't quite right: there's no
- * need to count owners without stateid's.
- *
- * Also note we should probably be using this in 4.0 case too.
- */
- return !list_empty(&clp->cl_openowners)
+ return client_has_openowners(clp)
#ifdef CONFIG_NFSD_PNFS
|| !list_empty(&clp->cl_lo_states)
#endif
@@ -3049,7 +3119,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* Cases below refer to rfc 3530 section 14.2.33: */
spin_lock(&nn->client_lock);
conf = find_confirmed_client_by_name(&clname, nn);
- if (conf) {
+ if (conf && client_has_state(conf)) {
/* case 0: */
status = nfserr_clid_inuse;
if (clp_used_exchangeid(conf))
@@ -3136,6 +3206,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
} else { /* case 3: normal case; new or rebooted client */
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
+ status = nfserr_clid_inuse;
+ if (client_has_state(old)
+ && !same_creds(&unconf->cl_cred,
+ &old->cl_cred))
+ goto out;
status = mark_client_expired_locked(old);
if (status) {
old = NULL;
@@ -3317,6 +3392,27 @@ static const struct nfs4_stateowner_operations openowner_ops = {
.so_free = nfs4_free_openowner,
};
+static struct nfs4_ol_stateid *
+nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
+{
+ struct nfs4_ol_stateid *local, *ret = NULL;
+ struct nfs4_openowner *oo = open->op_openowner;
+
+ lockdep_assert_held(&fp->fi_lock);
+
+ list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
+ /* ignore lock owners */
+ if (local->st_stateowner->so_is_open_owner == 0)
+ continue;
+ if (local->st_stateowner == &oo->oo_owner) {
+ ret = local;
+ atomic_inc(&ret->st_stid.sc_count);
+ break;
+ }
+ }
+ return ret;
+}
+
static struct nfs4_openowner *
alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
struct nfsd4_compound_state *cstate)
@@ -3348,9 +3444,20 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
return ret;
}
-static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
+static struct nfs4_ol_stateid *
+init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
+ struct nfsd4_open *open)
+{
+
struct nfs4_openowner *oo = open->op_openowner;
+ struct nfs4_ol_stateid *retstp = NULL;
+ spin_lock(&oo->oo_owner.so_client->cl_lock);
+ spin_lock(&fp->fi_lock);
+
+ retstp = nfsd4_find_existing_open(fp, open);
+ if (retstp)
+ goto out_unlock;
atomic_inc(&stp->st_stid.sc_count);
stp->st_stid.sc_type = NFS4_OPEN_STID;
INIT_LIST_HEAD(&stp->st_locks);
@@ -3360,12 +3467,14 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
stp->st_access_bmap = 0;
stp->st_deny_bmap = 0;
stp->st_openstp = NULL;
- spin_lock(&oo->oo_owner.so_client->cl_lock);
+ init_rwsem(&stp->st_rwsem);
list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
- spin_lock(&fp->fi_lock);
list_add(&stp->st_perfile, &fp->fi_stateids);
+
+out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&oo->oo_owner.so_client->cl_lock);
+ return retstp;
}
/*
@@ -3776,27 +3885,6 @@ out:
return nfs_ok;
}
-static struct nfs4_ol_stateid *
-nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
-{
- struct nfs4_ol_stateid *local, *ret = NULL;
- struct nfs4_openowner *oo = open->op_openowner;
-
- spin_lock(&fp->fi_lock);
- list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
- /* ignore lock owners */
- if (local->st_stateowner->so_is_open_owner == 0)
- continue;
- if (local->st_stateowner == &oo->oo_owner) {
- ret = local;
- atomic_inc(&ret->st_stid.sc_count);
- break;
- }
- }
- spin_unlock(&fp->fi_lock);
- return ret;
-}
-
static inline int nfs4_access_to_access(u32 nfs4_access)
{
int flags = 0;
@@ -3945,6 +4033,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
return fl;
}
+/**
+ * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer
+ * @dp: a pointer to the nfs4_delegation we're adding.
+ *
+ * Return:
+ * On success: Return code will be 0 on success.
+ *
+ * On error: -EAGAIN if there was an existing delegation.
+ * nonzero if there is an error in other cases.
+ *
+ */
+
static int nfs4_setlease(struct nfs4_delegation *dp)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
@@ -3976,16 +4076,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
goto out_unlock;
/* Race breaker */
if (fp->fi_deleg_file) {
- status = 0;
- ++fp->fi_delegees;
- hash_delegation_locked(dp, fp);
+ status = hash_delegation_locked(dp, fp);
goto out_unlock;
}
fp->fi_deleg_file = filp;
- fp->fi_delegees = 1;
- hash_delegation_locked(dp, fp);
+ fp->fi_delegees = 0;
+ status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
+ if (status) {
+ /* Should never happen, this is a new fi_deleg_file */
+ WARN_ON_ONCE(1);
+ goto out_fput;
+ }
return 0;
out_unlock:
spin_unlock(&fp->fi_lock);
@@ -4005,6 +4108,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
+ spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+ status = nfs4_get_existing_delegation(clp, fp);
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&state_lock);
+
+ if (status)
+ return ERR_PTR(status);
+
dp = alloc_init_deleg(clp, fh, odstate);
if (!dp)
return ERR_PTR(-ENOMEM);
@@ -4023,9 +4135,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
status = -EAGAIN;
goto out_unlock;
}
- ++fp->fi_delegees;
- hash_delegation_locked(dp, fp);
- status = 0;
+ status = hash_delegation_locked(dp, fp);
out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
@@ -4160,6 +4270,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
struct nfs4_file *fp = NULL;
struct nfs4_ol_stateid *stp = NULL;
+ struct nfs4_ol_stateid *swapstp = NULL;
struct nfs4_delegation *dp = NULL;
__be32 status;
@@ -4173,7 +4284,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
status = nfs4_check_deleg(cl, open, &dp);
if (status)
goto out;
+ spin_lock(&fp->fi_lock);
stp = nfsd4_find_existing_open(fp, open);
+ spin_unlock(&fp->fi_lock);
} else {
open->op_file = NULL;
status = nfserr_bad_stateid;
@@ -4187,15 +4300,32 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
*/
if (stp) {
/* Stateid was found, this is an OPEN upgrade */
+ down_read(&stp->st_rwsem);
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
- if (status)
+ if (status) {
+ up_read(&stp->st_rwsem);
goto out;
+ }
} else {
stp = open->op_stp;
open->op_stp = NULL;
- init_open_stateid(stp, fp, open);
+ swapstp = init_open_stateid(stp, fp, open);
+ if (swapstp) {
+ nfs4_put_stid(&stp->st_stid);
+ stp = swapstp;
+ down_read(&stp->st_rwsem);
+ status = nfs4_upgrade_open(rqstp, fp, current_fh,
+ stp, open);
+ if (status) {
+ up_read(&stp->st_rwsem);
+ goto out;
+ }
+ goto upgrade_out;
+ }
+ down_read(&stp->st_rwsem);
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
if (status) {
+ up_read(&stp->st_rwsem);
release_open_stateid(stp);
goto out;
}
@@ -4205,8 +4335,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
if (stp->st_clnt_odstate == open->op_odstate)
open->op_odstate = NULL;
}
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+upgrade_out:
+ nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
+ up_read(&stp->st_rwsem);
if (nfsd4_has_session(&resp->cstate)) {
if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
@@ -4819,10 +4950,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
* revoked delegations are kept only for free_stateid.
*/
return nfserr_bad_stateid;
+ down_write(&stp->st_rwsem);
status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
- if (status)
- return status;
- return nfs4_check_fh(current_fh, &stp->st_stid);
+ if (status == nfs_ok)
+ status = nfs4_check_fh(current_fh, &stp->st_stid);
+ if (status != nfs_ok)
+ up_write(&stp->st_rwsem);
+ return status;
}
/*
@@ -4869,6 +5003,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
return status;
oo = openowner(stp->st_stateowner);
if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
+ up_write(&stp->st_rwsem);
nfs4_put_stid(&stp->st_stid);
return nfserr_bad_stateid;
}
@@ -4899,11 +5034,13 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
oo = openowner(stp->st_stateowner);
status = nfserr_bad_stateid;
- if (oo->oo_flags & NFS4_OO_CONFIRMED)
+ if (oo->oo_flags & NFS4_OO_CONFIRMED) {
+ up_write(&stp->st_rwsem);
goto put_stateid;
+ }
oo->oo_flags |= NFS4_OO_CONFIRMED;
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
+ up_write(&stp->st_rwsem);
dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
@@ -4975,13 +5112,11 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
goto put_stateid;
}
nfs4_stateid_downgrade(stp, od->od_share_access);
-
reset_union_bmap_deny(od->od_share_deny, stp);
-
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid);
status = nfs_ok;
put_stateid:
+ up_write(&stp->st_rwsem);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
@@ -5033,8 +5168,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfsd4_bump_seqid(cstate, status);
if (status)
goto out;
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
+ up_write(&stp->st_rwsem);
nfsd4_close_open_stateid(stp);
@@ -5260,6 +5395,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
+ init_rwsem(&stp->st_rwsem);
list_add(&stp->st_locks, &open_stp->st_locks);
list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
spin_lock(&fp->fi_lock);
@@ -5428,6 +5564,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&open_stp, nn);
if (status)
goto out;
+ up_write(&open_stp->st_rwsem);
open_sop = openowner(open_stp->st_stateowner);
status = nfserr_bad_stateid;
if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
@@ -5435,6 +5572,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
status = lookup_or_create_lock_state(cstate, open_stp, lock,
&lock_stp, &new);
+ if (status == nfs_ok)
+ down_write(&lock_stp->st_rwsem);
} else {
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_old_lock_seqid,
@@ -5512,9 +5651,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
switch (-err) {
case 0: /* success! */
- update_stateid(&lock_stp->st_stid.sc_stateid);
- memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid,
- sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
status = 0;
break;
case (EAGAIN): /* conflock holds conflicting lock */
@@ -5540,6 +5677,8 @@ out:
seqid_mutating_err(ntohl(status)))
lock_sop->lo_owner.so_seqid++;
+ up_write(&lock_stp->st_rwsem);
+
/*
* If this is a new, never-before-used stateid, and we are
* returning an error, then just go ahead and release it.
@@ -5704,11 +5843,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
goto out_nfserr;
}
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid);
fput:
fput(filp);
put_stateid:
+ up_write(&stp->st_rwsem);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 46ec934f5dee..54cde9a5864e 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -63,7 +63,6 @@ static unsigned int longest_chain;
static unsigned int longest_chain_cachesize;
static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
-static void cache_cleaner_func(struct work_struct *unused);
static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink,
@@ -76,13 +75,6 @@ static struct shrinker nfsd_reply_cache_shrinker = {
};
/*
- * locking for the reply cache:
- * A cache entry is "single use" if c_state == RC_INPROG
- * Otherwise, it when accessing _prev or _next, the lock must be held.
- */
-static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func);
-
-/*
* Put a cap on the size of the DRC based on the amount of available
* low memory in the machine.
*
@@ -203,7 +195,6 @@ void nfsd_reply_cache_shutdown(void)
unsigned int i;
unregister_shrinker(&nfsd_reply_cache_shrinker);
- cancel_delayed_work_sync(&cache_cleaner);
for (i = 0; i < drc_hashsize; i++) {
struct list_head *head = &drc_hashtbl[i].lru_head;
@@ -217,10 +208,8 @@ void nfsd_reply_cache_shutdown(void)
drc_hashtbl = NULL;
drc_hashsize = 0;
- if (drc_slab) {
- kmem_cache_destroy(drc_slab);
- drc_slab = NULL;
- }
+ kmem_cache_destroy(drc_slab);
+ drc_slab = NULL;
}
/*
@@ -232,7 +221,6 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
{
rp->c_timestamp = jiffies;
list_move_tail(&rp->c_lru, &b->lru_head);
- schedule_delayed_work(&cache_cleaner, RC_EXPIRE);
}
static long
@@ -266,7 +254,6 @@ prune_cache_entries(void)
{
unsigned int i;
long freed = 0;
- bool cancel = true;
for (i = 0; i < drc_hashsize; i++) {
struct nfsd_drc_bucket *b = &drc_hashtbl[i];
@@ -275,26 +262,11 @@ prune_cache_entries(void)
continue;
spin_lock(&b->cache_lock);
freed += prune_bucket(b);
- if (!list_empty(&b->lru_head))
- cancel = false;
spin_unlock(&b->cache_lock);
}
-
- /*
- * Conditionally rearm the job to run in RC_EXPIRE since we just
- * ran the pruner.
- */
- if (!cancel)
- mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE);
return freed;
}
-static void
-cache_cleaner_func(struct work_struct *unused)
-{
- prune_cache_entries();
-}
-
static unsigned long
nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 350041a40fe5..c1681ce894c5 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -631,10 +631,7 @@ fh_put(struct svc_fh *fhp)
fh_unlock(fhp);
fhp->fh_dentry = NULL;
dput(dentry);
-#ifdef CONFIG_NFSD_V3
- fhp->fh_pre_saved = 0;
- fhp->fh_post_saved = 0;
-#endif
+ fh_clear_wcc(fhp);
}
fh_drop_write(fhp);
if (exp) {
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 1e90dad4926b..2087bae17582 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -26,16 +26,16 @@ static inline ino_t u32_to_ino_t(__u32 uino)
*/
typedef struct svc_fh {
struct knfsd_fh fh_handle; /* FH data */
+ int fh_maxsize; /* max size for fh_handle */
struct dentry * fh_dentry; /* validated dentry */
struct svc_export * fh_export; /* export pointer */
- int fh_maxsize; /* max size for fh_handle */
- unsigned char fh_locked; /* inode locked by us */
- unsigned char fh_want_write; /* remount protection taken */
+ bool fh_locked; /* inode locked by us */
+ bool fh_want_write; /* remount protection taken */
#ifdef CONFIG_NFSD_V3
- unsigned char fh_post_saved; /* post-op attrs saved */
- unsigned char fh_pre_saved; /* pre-op attrs saved */
+ bool fh_post_saved; /* post-op attrs saved */
+ bool fh_pre_saved; /* pre-op attrs saved */
/* Pre-op attributes saved during fh_lock */
__u64 fh_pre_size; /* size before operation */
@@ -213,8 +213,8 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
static inline void
fh_clear_wcc(struct svc_fh *fhp)
{
- fhp->fh_post_saved = 0;
- fhp->fh_pre_saved = 0;
+ fhp->fh_post_saved = false;
+ fhp->fh_pre_saved = false;
}
/*
@@ -231,7 +231,7 @@ fill_pre_wcc(struct svc_fh *fhp)
fhp->fh_pre_ctime = inode->i_ctime;
fhp->fh_pre_size = inode->i_size;
fhp->fh_pre_change = inode->i_version;
- fhp->fh_pre_saved = 1;
+ fhp->fh_pre_saved = true;
}
}
@@ -267,7 +267,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
inode = d_inode(dentry);
mutex_lock_nested(&inode->i_mutex, subclass);
fill_pre_wcc(fhp);
- fhp->fh_locked = 1;
+ fhp->fh_locked = true;
}
static inline void
@@ -285,7 +285,7 @@ fh_unlock(struct svc_fh *fhp)
if (fhp->fh_locked) {
fill_post_wcc(fhp);
mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex);
- fhp->fh_locked = 0;
+ fhp->fh_locked = false;
}
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 583ffc13cae2..77fdf4de91ba 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -84,7 +84,7 @@ struct nfsd4_callback_ops {
* fields that are of general use to any stateid.
*/
struct nfs4_stid {
- atomic_t sc_count;
+ atomic_t sc_count;
#define NFS4_OPEN_STID 1
#define NFS4_LOCK_STID 2
#define NFS4_DELEG_STID 4
@@ -94,11 +94,12 @@ struct nfs4_stid {
#define NFS4_REVOKED_DELEG_STID 16
#define NFS4_CLOSED_DELEG_STID 32
#define NFS4_LAYOUT_STID 64
- unsigned char sc_type;
- stateid_t sc_stateid;
- struct nfs4_client *sc_client;
- struct nfs4_file *sc_file;
- void (*sc_free)(struct nfs4_stid *);
+ unsigned char sc_type;
+ stateid_t sc_stateid;
+ spinlock_t sc_lock;
+ struct nfs4_client *sc_client;
+ struct nfs4_file *sc_file;
+ void (*sc_free)(struct nfs4_stid *);
};
/*
@@ -364,15 +365,6 @@ struct nfs4_client_reclaim {
char cr_recdir[HEXDIR_LEN]; /* recover dir */
};
-static inline void
-update_stateid(stateid_t *stateid)
-{
- stateid->si_generation++;
- /* Wraparound recommendation from 3530bis-13 9.1.3.2: */
- if (stateid->si_generation == 0)
- stateid->si_generation = 1;
-}
-
/* A reasonable value for REPLAY_ISIZE was estimated as follows:
* The OPEN response, typically the largest, requires
* 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) +
@@ -534,15 +526,16 @@ struct nfs4_file {
* Better suggestions welcome.
*/
struct nfs4_ol_stateid {
- struct nfs4_stid st_stid; /* must be first field */
- struct list_head st_perfile;
- struct list_head st_perstateowner;
- struct list_head st_locks;
- struct nfs4_stateowner * st_stateowner;
- struct nfs4_clnt_odstate * st_clnt_odstate;
- unsigned char st_access_bmap;
- unsigned char st_deny_bmap;
- struct nfs4_ol_stateid * st_openstp;
+ struct nfs4_stid st_stid;
+ struct list_head st_perfile;
+ struct list_head st_perstateowner;
+ struct list_head st_locks;
+ struct nfs4_stateowner *st_stateowner;
+ struct nfs4_clnt_odstate *st_clnt_odstate;
+ unsigned char st_access_bmap;
+ unsigned char st_deny_bmap;
+ struct nfs4_ol_stateid *st_openstp;
+ struct rw_semaphore st_rwsem;
};
static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
@@ -561,6 +554,7 @@ struct nfs4_layout_stateid {
struct nfsd4_callback ls_recall;
stateid_t ls_recall_sid;
bool ls_recalled;
+ struct mutex ls_mutex;
};
static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
@@ -593,6 +587,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
struct kmem_cache *slab);
void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s);
+void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
extern void nfs4_release_reclaim(struct nfsd_net *);
extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c
index 82f89070594c..90967466a1e5 100644
--- a/fs/nfsd/trace.c
+++ b/fs/nfsd/trace.c
@@ -1,5 +1,3 @@
-#include "state.h"
-
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index c668520c344b..0befe762762b 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -9,6 +9,8 @@
#include <linux/tracepoint.h>
+#include "state.h"
+
DECLARE_EVENT_CLASS(nfsd_stateid_class,
TP_PROTO(stateid_t *stp),
TP_ARGS(stp),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 45c04979e7b3..994d66fbb446 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1631,7 +1631,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
/* cannot use fh_lock as we need deadlock protective ordering
* so do it by hand */
trap = lock_rename(tdentry, fdentry);
- ffhp->fh_locked = tfhp->fh_locked = 1;
+ ffhp->fh_locked = tfhp->fh_locked = true;
fill_pre_wcc(ffhp);
fill_pre_wcc(tfhp);
@@ -1681,7 +1681,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
fill_post_wcc(ffhp);
fill_post_wcc(tfhp);
unlock_rename(tdentry, fdentry);
- ffhp->fh_locked = tfhp->fh_locked = 0;
+ ffhp->fh_locked = tfhp->fh_locked = false;
fh_drop_write(ffhp);
out:
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index fee2451ae248..fcfc48cbe136 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -112,14 +112,14 @@ static inline int fh_want_write(struct svc_fh *fh)
int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
if (!ret)
- fh->fh_want_write = 1;
+ fh->fh_want_write = true;
return ret;
}
static inline void fh_drop_write(struct svc_fh *fh)
{
if (fh->fh_want_write) {
- fh->fh_want_write = 0;
+ fh->fh_want_write = false;
mnt_drop_write(fh->fh_export->ex_path.mnt);
}
}
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 9f991007a578..ce7362c88b48 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -632,7 +632,7 @@ static inline void
set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
{
BUG_ON(!fhp->fh_pre_saved);
- cinfo->atomic = fhp->fh_post_saved;
+ cinfo->atomic = (u32)fhp->fh_post_saved;
cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry));
cinfo->before_change = fhp->fh_pre_change;
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 54575e3cc1a2..088ba001c6ef 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -109,7 +109,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
goto out;
file_update_time(vma->vm_file);
- ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
+ ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
if (ret) {
nilfs_transaction_abort(inode->i_sb);
goto out;
diff --git a/fs/pipe.c b/fs/pipe.c
index 8865f7963700..42cf8ddf0e55 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -366,18 +366,17 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
int offset = buf->offset + buf->len;
if (ops->can_merge && offset + chars <= PAGE_SIZE) {
- int error = ops->confirm(pipe, buf);
- if (error)
+ ret = ops->confirm(pipe, buf);
+ if (ret)
goto out;
ret = copy_page_from_iter(buf->page, offset, chars, from);
if (unlikely(ret < chars)) {
- error = -EFAULT;
+ ret = -EFAULT;
goto out;
}
do_wakeup = 1;
- buf->len += chars;
- ret = chars;
+ buf->len += ret;
if (!iov_iter_count(from))
goto out;
}
@@ -693,17 +692,20 @@ int create_pipe_files(struct file **res, int flags)
d_instantiate(path.dentry, inode);
- err = -ENFILE;
f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
- if (IS_ERR(f))
+ if (IS_ERR(f)) {
+ err = PTR_ERR(f);
goto err_dentry;
+ }
f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
f->private_data = inode->i_pipe;
res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
- if (IS_ERR(res[0]))
+ if (IS_ERR(res[0])) {
+ err = PTR_ERR(res[0]);
goto err_file;
+ }
path_get(&path);
res[0]->private_data = inode->i_pipe;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index a096841bd06c..f64639176670 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -84,6 +84,7 @@ xfs-y += xfs_aops.o \
xfs_message.o \
xfs_mount.o \
xfs_mru_cache.o \
+ xfs_stats.o \
xfs_super.o \
xfs_symlink.o \
xfs_sysfs.o \
@@ -118,7 +119,6 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
-xfs-$(CONFIG_PROC_FS) += xfs_stats.o
xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
xfs-$(CONFIG_NFSD_PNFS) += xfs_pnfs.o
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index a7a3a63bb360..686ba6fb20dd 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -55,8 +55,9 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
return ptr;
if (!(++retries % 100))
xfs_err(NULL,
- "possible memory allocation deadlock in %s (mode:0x%x)",
- __func__, lflags);
+ "%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)",
+ current->comm, current->pid,
+ (unsigned int)size, __func__, lflags);
congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (1);
}
@@ -120,8 +121,9 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags)
return ptr;
if (!(++retries % 100))
xfs_err(NULL,
- "possible memory allocation deadlock in %s (mode:0x%x)",
- __func__, lflags);
+ "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
+ current->comm, current->pid,
+ __func__, lflags);
congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (1);
}
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index ffad7f20342f..3479294c1d58 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -482,7 +482,9 @@ xfs_agfl_verify(
be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
return false;
}
- return true;
+
+ return xfs_log_check_lsn(mp,
+ be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
}
static void
@@ -651,8 +653,8 @@ xfs_alloc_ag_vextent(
-((long)(args->len)));
}
- XFS_STATS_INC(xs_allocx);
- XFS_STATS_ADD(xs_allocb, args->len);
+ XFS_STATS_INC(args->mp, xs_allocx);
+ XFS_STATS_ADD(args->mp, xs_allocb, args->len);
return error;
}
@@ -1808,8 +1810,8 @@ xfs_free_ag_extent(
if (!isfl)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
- XFS_STATS_INC(xs_freex);
- XFS_STATS_ADD(xs_freeb, len);
+ XFS_STATS_INC(mp, xs_freex);
+ XFS_STATS_ADD(mp, xs_freeb, len);
trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
@@ -2259,9 +2261,13 @@ xfs_agf_verify(
{
struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
- if (xfs_sb_version_hascrc(&mp->m_sb) &&
- !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
return false;
+ if (!xfs_log_check_lsn(mp,
+ be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
+ return false;
+ }
if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
@@ -2503,7 +2509,7 @@ xfs_alloc_vextent(
* Try near allocation first, then anywhere-in-ag after
* the first a.g. fails.
*/
- if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) &&
+ if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) &&
(mp->m_flags & XFS_MOUNT_32BITINODES)) {
args->fsbno = XFS_AGB_TO_FSB(mp,
((mp->m_agfrotor / rotorstep) %
@@ -2634,6 +2640,14 @@ xfs_alloc_vextent(
XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
args->len);
#endif
+
+ /* Zero the extent if we were asked to do so */
+ if (args->userdata & XFS_ALLOC_USERDATA_ZERO) {
+ error = xfs_zero_extent(args->ip, args->fsbno, args->len);
+ if (error)
+ goto error0;
+ }
+
}
xfs_perag_put(args->pag);
return 0;
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index ca1c8168373a..0ecde4d5cac8 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -101,6 +101,7 @@ typedef struct xfs_alloc_arg {
struct xfs_mount *mp; /* file system mount point */
struct xfs_buf *agbp; /* buffer for a.g. freelist header */
struct xfs_perag *pag; /* per-ag struct for this agno */
+ struct xfs_inode *ip; /* for userdata zeroing method */
xfs_fsblock_t fsbno; /* file system block number */
xfs_agnumber_t agno; /* allocation group number */
xfs_agblock_t agbno; /* allocation group-relative block # */
@@ -120,15 +121,16 @@ typedef struct xfs_alloc_arg {
char wasdel; /* set if allocation was prev delayed */
char wasfromfl; /* set if allocation is from freelist */
char isfl; /* set if is freelist blocks - !acctg */
- char userdata; /* set if this is user data */
+ char userdata; /* mask defining userdata treatment */
xfs_fsblock_t firstblock; /* io first block allocated */
} xfs_alloc_arg_t;
/*
* Defines for userdata
*/
-#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/
-#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */
+#define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/
+#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */
+#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */
xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
struct xfs_perag *pag, xfs_extlen_t need);
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index ff065578969f..f949818fa1c7 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -125,7 +125,7 @@ xfs_attr_get(
uint lock_mode;
int error;
- XFS_STATS_INC(xs_attr_get);
+ XFS_STATS_INC(ip->i_mount, xs_attr_get);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
@@ -209,7 +209,7 @@ xfs_attr_set(
int rsvd = (flags & ATTR_ROOT) != 0;
int error, err2, committed, local;
- XFS_STATS_INC(xs_attr_set);
+ XFS_STATS_INC(mp, xs_attr_set);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
@@ -412,7 +412,7 @@ xfs_attr_remove(
xfs_fsblock_t firstblock;
int error;
- XFS_STATS_INC(xs_attr_remove);
+ XFS_STATS_INC(mp, xs_attr_remove);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 33df52d97ec7..aa187f7ba2dd 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -41,6 +41,7 @@
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
#include "xfs_dir2.h"
+#include "xfs_log.h"
/*
@@ -266,6 +267,8 @@ xfs_attr3_leaf_verify(
return false;
if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
+ return false;
} else {
if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
return false;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index f38f9bd81557..5ab95ffa4ae9 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -107,7 +107,7 @@ xfs_attr3_rmt_verify(
if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
return false;
if (be32_to_cpu(rmt->rm_offset) +
- be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX)
+ be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
return false;
if (rmt->rm_owner == 0)
return false;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 8e2010d53b07..119c2422aac7 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -948,14 +948,16 @@ xfs_bmap_local_to_extents(
bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
/*
- * Initialise the block and copy the data
+ * Initialize the block, copy the data and log the remote buffer.
*
- * Note: init_fn must set the buffer log item type correctly!
+ * The callout is responsible for logging because the remote format
+ * might differ from the local format and thus we don't know how much to
+ * log here. Note that init_fn must also set the buffer log item type
+ * correctly.
*/
init_fn(tp, bp, ip, ifp);
- /* account for the change in fork size and log everything */
- xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
+ /* account for the change in fork size */
xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
xfs_bmap_local_to_extents_empty(ip, whichfork);
flags |= XFS_ILOG_CORE;
@@ -1435,7 +1437,7 @@ xfs_bmap_search_extents(
xfs_ifork_t *ifp; /* inode fork pointer */
xfs_bmbt_rec_host_t *ep; /* extent record pointer */
- XFS_STATS_INC(xs_look_exlist);
+ XFS_STATS_INC(ip->i_mount, xs_look_exlist);
ifp = XFS_IFORK_PTR(ip, fork);
ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
@@ -1732,7 +1734,7 @@ xfs_bmap_add_extent_delay_real(
ASSERT(!bma->cur ||
(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
- XFS_STATS_INC(xs_add_exlist);
+ XFS_STATS_INC(mp, xs_add_exlist);
#define LEFT r[0]
#define RIGHT r[1]
@@ -2286,7 +2288,7 @@ xfs_bmap_add_extent_unwritten_real(
ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
ASSERT(!isnullstartblock(new->br_startblock));
- XFS_STATS_INC(xs_add_exlist);
+ XFS_STATS_INC(mp, xs_add_exlist);
#define LEFT r[0]
#define RIGHT r[1]
@@ -2946,7 +2948,7 @@ xfs_bmap_add_extent_hole_real(
ASSERT(!bma->cur ||
!(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
- XFS_STATS_INC(xs_add_exlist);
+ XFS_STATS_INC(mp, xs_add_exlist);
state = 0;
if (whichfork == XFS_ATTR_FORK)
@@ -3800,8 +3802,13 @@ xfs_bmap_btalloc(
args.wasdel = ap->wasdel;
args.isfl = 0;
args.userdata = ap->userdata;
- if ((error = xfs_alloc_vextent(&args)))
+ if (ap->userdata & XFS_ALLOC_USERDATA_ZERO)
+ args.ip = ap->ip;
+
+ error = xfs_alloc_vextent(&args);
+ if (error)
return error;
+
if (tryagain && args.fsbno == NULLFSBLOCK) {
/*
* Exact allocation failed. Now try with alignment
@@ -4036,7 +4043,7 @@ xfs_bmapi_read(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- XFS_STATS_INC(xs_blk_mapr);
+ XFS_STATS_INC(mp, xs_blk_mapr);
ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -4221,7 +4228,7 @@ xfs_bmapi_delay(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- XFS_STATS_INC(xs_blk_mapw);
+ XFS_STATS_INC(mp, xs_blk_mapw);
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
@@ -4300,11 +4307,14 @@ xfs_bmapi_allocate(
/*
* Indicate if this is the first user data in the file, or just any
- * user data.
+ * user data. And if it is userdata, indicate whether it needs to
+ * be initialised to zero during allocation.
*/
if (!(bma->flags & XFS_BMAPI_METADATA)) {
bma->userdata = (bma->offset == 0) ?
XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
+ if (bma->flags & XFS_BMAPI_ZERO)
+ bma->userdata |= XFS_ALLOC_USERDATA_ZERO;
}
bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
@@ -4419,6 +4429,17 @@ xfs_bmapi_convert_unwritten(
mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
+ /*
+ * Before insertion into the bmbt, zero the range being converted
+ * if required.
+ */
+ if (flags & XFS_BMAPI_ZERO) {
+ error = xfs_zero_extent(bma->ip, mval->br_startblock,
+ mval->br_blockcount);
+ if (error)
+ return error;
+ }
+
error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
&bma->cur, mval, bma->firstblock, bma->flist,
&tmp_logflags);
@@ -4512,6 +4533,18 @@ xfs_bmapi_write(
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ /* zeroing is for currently only for data extents, not metadata */
+ ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
+ (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
+ /*
+ * we can allocate unwritten extents or pre-zero allocated blocks,
+ * but it makes no sense to do both at once. This would result in
+ * zeroing the unwritten extent twice, but it still being an
+ * unwritten extent....
+ */
+ ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
+ (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
+
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
@@ -4525,7 +4558,7 @@ xfs_bmapi_write(
ifp = XFS_IFORK_PTR(ip, whichfork);
- XFS_STATS_INC(xs_blk_mapw);
+ XFS_STATS_INC(mp, xs_blk_mapw);
if (*firstblock == NULLFSBLOCK) {
if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
@@ -4718,12 +4751,12 @@ xfs_bmap_del_extent(
xfs_filblks_t temp2; /* for indirect length calculations */
int state = 0;
- XFS_STATS_INC(xs_del_exlist);
+ mp = ip->i_mount;
+ XFS_STATS_INC(mp, xs_del_exlist);
if (whichfork == XFS_ATTR_FORK)
state |= BMAP_ATTRFORK;
- mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
(uint)sizeof(xfs_bmbt_rec_t)));
@@ -5070,7 +5103,7 @@ xfs_bunmapi(
*done = 1;
return 0;
}
- XFS_STATS_INC(xs_blk_unmap);
+ XFS_STATS_INC(mp, xs_blk_unmap);
isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
start = bno;
bno = start + len - 1;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 6aaa0c1c7200..a160f8a5a3fc 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -52,9 +52,9 @@ struct xfs_bmalloca {
xfs_extlen_t minleft; /* amount must be left after alloc */
bool eof; /* set if allocating past last extent */
bool wasdel; /* replacing a delayed allocation */
- bool userdata;/* set if is user data */
bool aeof; /* allocated space at eof */
bool conv; /* overwriting unwritten extents */
+ char userdata;/* userdata mask */
int flags;
};
@@ -109,6 +109,14 @@ typedef struct xfs_bmap_free
*/
#define XFS_BMAPI_CONVERT 0x040
+/*
+ * allocate zeroed extents - this requires all newly allocated user data extents
+ * to be initialised to zero. It will be ignored if XFS_BMAPI_METADATA is set.
+ * Use in conjunction with XFS_BMAPI_CONVERT to convert unwritten extents found
+ * during the allocation range to zeroed written extents.
+ */
+#define XFS_BMAPI_ZERO 0x080
+
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \
@@ -116,7 +124,8 @@ typedef struct xfs_bmap_free
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
{ XFS_BMAPI_CONTIG, "CONTIG" }, \
- { XFS_BMAPI_CONVERT, "CONVERT" }
+ { XFS_BMAPI_CONVERT, "CONVERT" }, \
+ { XFS_BMAPI_ZERO, "ZERO" }
static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index f7d7ee7a2607..af1bbee5586e 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -32,6 +32,7 @@
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_alloc.h"
+#include "xfs_log.h"
/*
* Cursor allocation zone.
@@ -222,7 +223,7 @@ xfs_btree_check_ptr(
* long-form btree header.
*
* Prior to calculting the CRC, pull the LSN out of the buffer log item and put
- * it into the buffer so recovery knows what the last modifcation was that made
+ * it into the buffer so recovery knows what the last modification was that made
* it to disk.
*/
void
@@ -243,8 +244,14 @@ bool
xfs_btree_lblock_verify_crc(
struct xfs_buf *bp)
{
- if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
+ return false;
return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
+ }
return true;
}
@@ -254,7 +261,7 @@ xfs_btree_lblock_verify_crc(
* short-form btree header.
*
* Prior to calculting the CRC, pull the LSN out of the buffer log item and put
- * it into the buffer so recovery knows what the last modifcation was that made
+ * it into the buffer so recovery knows what the last modification was that made
* it to disk.
*/
void
@@ -275,8 +282,14 @@ bool
xfs_btree_sblock_verify_crc(
struct xfs_buf *bp)
{
- if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
+ return false;
return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+ }
return true;
}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 8f18bab73ea5..992dec0638f3 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -84,31 +84,38 @@ union xfs_btree_rec {
/*
* Generic stats interface
*/
-#define __XFS_BTREE_STATS_INC(type, stat) \
- XFS_STATS_INC(xs_ ## type ## _2_ ## stat)
-#define XFS_BTREE_STATS_INC(cur, stat) \
+#define __XFS_BTREE_STATS_INC(mp, type, stat) \
+ XFS_STATS_INC(mp, xs_ ## type ## _2_ ## stat)
+#define XFS_BTREE_STATS_INC(cur, stat) \
do { \
+ struct xfs_mount *__mp = cur->bc_mp; \
switch (cur->bc_btnum) { \
- case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \
- case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \
- case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
- case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
- case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \
+ case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(__mp, abtb, stat); break; \
+ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(__mp, abtc, stat); break; \
+ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \
+ case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \
+ case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
-#define __XFS_BTREE_STATS_ADD(type, stat, val) \
- XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val)
+#define __XFS_BTREE_STATS_ADD(mp, type, stat, val) \
+ XFS_STATS_ADD(mp, xs_ ## type ## _2_ ## stat, val)
#define XFS_BTREE_STATS_ADD(cur, stat, val) \
do { \
+ struct xfs_mount *__mp = cur->bc_mp; \
switch (cur->bc_btnum) { \
- case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \
- case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
- case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
- case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
- case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
- case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
+ case XFS_BTNUM_BNO: \
+ __XFS_BTREE_STATS_ADD(__mp, abtb, stat, val); break; \
+ case XFS_BTNUM_CNT: \
+ __XFS_BTREE_STATS_ADD(__mp, abtc, stat, val); break; \
+ case XFS_BTNUM_BMAP: \
+ __XFS_BTREE_STATS_ADD(__mp, bmbt, stat, val); break; \
+ case XFS_BTNUM_INO: \
+ __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \
+ case XFS_BTNUM_FINO: \
+ __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \
+ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index be43248a5822..e89a0f8f827c 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -39,6 +39,7 @@
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_buf_item.h"
+#include "xfs_log.h"
/*
* xfs_da_btree.c
@@ -150,6 +151,8 @@ xfs_da3_node_verify(
return false;
if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
+ return false;
} else {
if (ichdr.magic != XFS_DA_NODE_MAGIC)
return false;
@@ -322,6 +325,7 @@ xfs_da3_node_create(
if (xfs_sb_version_hascrc(&mp->m_sb)) {
struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
+ memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
ichdr.magic = XFS_DA3_NODE_MAGIC;
hdr3->info.blkno = cpu_to_be64(bp->b_bn);
hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 9de401d297e5..2fb53a5c0a74 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -271,7 +271,7 @@ xfs_dir_createname(
rval = xfs_dir_ino_validate(tp->t_mountp, inum);
if (rval)
return rval;
- XFS_STATS_INC(xs_dir_create);
+ XFS_STATS_INC(dp->i_mount, xs_dir_create);
}
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
@@ -365,7 +365,7 @@ xfs_dir_lookup(
int lock_mode;
ASSERT(S_ISDIR(dp->i_d.di_mode));
- XFS_STATS_INC(xs_dir_lookup);
+ XFS_STATS_INC(dp->i_mount, xs_dir_lookup);
/*
* We need to use KM_NOFS here so that lockdep will not throw false
@@ -444,7 +444,7 @@ xfs_dir_removename(
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
- XFS_STATS_INC(xs_dir_remove);
+ XFS_STATS_INC(dp->i_mount, xs_dir_remove);
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
if (!args)
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 4778d1dd511a..9c10e2b8cfcb 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -34,6 +34,7 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
+#include "xfs_log.h"
/*
* Local function prototypes.
@@ -71,6 +72,8 @@ xfs_dir3_block_verify(
return false;
if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
+ return false;
} else {
if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
return false;
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 824131e71bc5..af71a84f343c 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -31,6 +31,7 @@
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
+#include "xfs_log.h"
/*
* Check the consistency of the data block.
@@ -224,6 +225,8 @@ xfs_dir3_data_verify(
return false;
if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
+ return false;
} else {
if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
return false;
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index f300240ebb8d..3923e1f94697 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -33,6 +33,7 @@
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
+#include "xfs_log.h"
/*
* Local function declarations.
@@ -164,6 +165,8 @@ xfs_dir3_leaf_verify(
return false;
if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
+ return false;
} else {
if (leaf->hdr.info.magic != cpu_to_be16(magic))
return false;
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index cc28e924545b..70b0cb2fd556 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -33,6 +33,7 @@
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
+#include "xfs_log.h"
/*
* Function declarations.
@@ -97,6 +98,8 @@ xfs_dir3_free_verify(
return false;
if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
+ return false;
} else {
if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
return false;
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 9590a069e556..8774498ce0ff 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -60,6 +60,14 @@ struct xfs_ifork;
#define XFS_SB_VERSION_MOREBITSBIT 0x8000
/*
+ * The size of a single extended attribute on disk is limited by
+ * the size of index values within the attribute entries themselves.
+ * These are be16 fields, so we can only support attribute data
+ * sizes up to 2^16 bytes in length.
+ */
+#define XFS_XATTR_SIZE_MAX (1 << 16)
+
+/*
* Supported feature bit list is just all bits in the versionnum field because
* we've used them all up and understand them all. Except, of course, for the
* shared superblock bit, which nobody knows what it does and so is unsupported.
@@ -1483,13 +1491,17 @@ struct xfs_acl {
*/
#define XFS_ACL_MAX_ENTRIES(mp) \
(xfs_sb_version_hascrc(&mp->m_sb) \
- ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
+ ? (XFS_XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
sizeof(struct xfs_acl_entry) \
: 25)
-#define XFS_ACL_MAX_SIZE(mp) \
+#define XFS_ACL_SIZE(cnt) \
(sizeof(struct xfs_acl) + \
- sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp)))
+ sizeof(struct xfs_acl_entry) * cnt)
+
+#define XFS_ACL_MAX_SIZE(mp) \
+ XFS_ACL_SIZE(XFS_ACL_MAX_ENTRIES((mp)))
+
/* On-disk XFS extended attribute names */
#define SGI_ACL_FILE "SGI_ACL_FILE"
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 89689c6a43e2..b2b73a998d42 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -490,6 +490,16 @@ typedef struct xfs_swapext
#define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
/*
+ * ioctl limits
+ */
+#ifdef XATTR_LIST_MAX
+# define XFS_XATTR_LIST_MAX XATTR_LIST_MAX
+#else
+# define XFS_XATTR_LIST_MAX 65536
+#endif
+
+
+/*
* ioctl commands that are used by Linux filesystems
*/
#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 54deb2d12ac6..70c1db99f6a7 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -38,6 +38,7 @@
#include "xfs_icreate_item.h"
#include "xfs_icache.h"
#include "xfs_trace.h"
+#include "xfs_log.h"
/*
@@ -2500,9 +2501,14 @@ xfs_agi_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_agi *agi = XFS_BUF_TO_AGI(bp);
- if (xfs_sb_version_hascrc(&mp->m_sb) &&
- !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
+ return false;
+ if (!xfs_log_check_lsn(mp,
+ be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn)))
return false;
+ }
+
/*
* Validate the magic number of the agi block.
*/
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 47425140f343..a0b071d881a0 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -35,6 +35,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
+#include "xfs_log.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -163,6 +164,15 @@ xfs_mount_validate_sb(
"Filesystem can not be safely mounted by this kernel.");
return -EINVAL;
}
+ } else if (xfs_sb_version_hascrc(sbp)) {
+ /*
+ * We can't read verify the sb LSN because the read verifier is
+ * called before the log is allocated and processed. We know the
+ * log is set up before write verifier (!check_version) calls,
+ * so just check it here.
+ */
+ if (!xfs_log_check_lsn(mp, sbp->sb_lsn))
+ return -EFSCORRUPTED;
}
if (xfs_sb_version_has_pquotino(sbp)) {
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 8f8af05b3f13..cb6fd20a4d3d 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -31,6 +31,7 @@
#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
+#include "xfs_log.h"
/*
@@ -60,6 +61,7 @@ xfs_symlink_hdr_set(
if (!xfs_sb_version_hascrc(&mp->m_sb))
return 0;
+ memset(dsl, 0, sizeof(struct xfs_dsymlink_hdr));
dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
dsl->sl_offset = cpu_to_be32(offset);
dsl->sl_bytes = cpu_to_be32(size);
@@ -116,6 +118,8 @@ xfs_symlink_verify(
return false;
if (dsl->sl_owner == 0)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)))
+ return false;
return true;
}
@@ -183,6 +187,7 @@ xfs_symlink_local_to_remote(
if (!xfs_sb_version_hascrc(&mp->m_sb)) {
bp->b_ops = NULL;
memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
+ xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
return;
}
@@ -198,4 +203,6 @@ xfs_symlink_local_to_remote(
buf = bp->b_addr;
buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
+ xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
+ ifp->if_bytes - 1);
}
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 4b641676f258..6bb470fbb8e8 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -37,16 +37,19 @@
STATIC struct posix_acl *
xfs_acl_from_disk(
- struct xfs_acl *aclp,
- int max_entries)
+ const struct xfs_acl *aclp,
+ int len,
+ int max_entries)
{
struct posix_acl_entry *acl_e;
struct posix_acl *acl;
- struct xfs_acl_entry *ace;
+ const struct xfs_acl_entry *ace;
unsigned int count, i;
+ if (len < sizeof(*aclp))
+ return ERR_PTR(-EFSCORRUPTED);
count = be32_to_cpu(aclp->acl_cnt);
- if (count > max_entries)
+ if (count > max_entries || XFS_ACL_SIZE(count) != len)
return ERR_PTR(-EFSCORRUPTED);
acl = posix_acl_alloc(count, GFP_KERNEL);
@@ -160,10 +163,11 @@ xfs_get_acl(struct inode *inode, int type)
*/
if (error == -ENOATTR)
goto out_update_cache;
+ acl = ERR_PTR(error);
goto out;
}
- acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount));
+ acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount));
if (IS_ERR(acl))
goto out;
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 3841b07f27bf..52f8255d6bdf 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -20,7 +20,6 @@
struct inode;
struct posix_acl;
-struct xfs_inode;
#ifdef CONFIG_XFS_POSIX_ACL
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
@@ -36,4 +35,7 @@ static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
# define posix_acl_access_exists(inode) 0
# define posix_acl_default_exists(inode) 0
#endif /* CONFIG_XFS_POSIX_ACL */
+
+extern void xfs_forget_acl(struct inode *inode, const char *name, int xflags);
+
#endif /* __XFS_ACL_H__ */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 50ab2879b9da..29e7e5dd5178 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -172,6 +172,12 @@ xfs_setfilesize_ioend(
current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
+ /* we abort the update if there was an IO error */
+ if (ioend->io_error) {
+ xfs_trans_cancel(tp);
+ return ioend->io_error;
+ }
+
return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
}
@@ -212,14 +218,17 @@ xfs_end_io(
ioend->io_error = -EIO;
goto done;
}
- if (ioend->io_error)
- goto done;
/*
* For unwritten extents we need to issue transactions to convert a
* range to normal written extens after the data I/O has finished.
+ * Detecting and handling completion IO errors is done individually
+ * for each case as different cleanup operations need to be performed
+ * on error.
*/
if (ioend->io_type == XFS_IO_UNWRITTEN) {
+ if (ioend->io_error)
+ goto done;
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size);
} else if (ioend->io_append_trans) {
@@ -1250,13 +1259,28 @@ xfs_vm_releasepage(
* the DIO. There is only going to be one reference to the ioend and its life
* cycle is constrained by the DIO completion code. hence we don't need
* reference counting here.
+ *
+ * Note that for DIO, an IO to the highest supported file block offset (i.e.
+ * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
+ * bit variable. Hence if we see this overflow, we have to assume that the IO is
+ * extending the file size. We won't know for sure until IO completion is run
+ * and the actual max write offset is communicated to the IO completion
+ * routine.
+ *
+ * For DAX page faults, we are preparing to never see unwritten extents here,
+ * nor should we ever extend the inode size. Hence we will soon have nothing to
+ * do here for this case, ensuring we don't have to provide an IO completion
+ * callback to free an ioend that we don't actually need for a fault into the
+ * page at offset (2^63 - 1FSB) bytes.
*/
+
static void
xfs_map_direct(
struct inode *inode,
struct buffer_head *bh_result,
struct xfs_bmbt_irec *imap,
- xfs_off_t offset)
+ xfs_off_t offset,
+ bool dax_fault)
{
struct xfs_ioend *ioend;
xfs_off_t size = bh_result->b_size;
@@ -1269,6 +1293,13 @@ xfs_map_direct(
trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
+ if (dax_fault) {
+ ASSERT(type == XFS_IO_OVERWRITE);
+ trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
+ imap);
+ return;
+ }
+
if (bh_result->b_private) {
ioend = bh_result->b_private;
ASSERT(ioend->io_size > 0);
@@ -1283,7 +1314,8 @@ xfs_map_direct(
ioend->io_size, ioend->io_type,
imap);
} else if (type == XFS_IO_UNWRITTEN ||
- offset + size > i_size_read(inode)) {
+ offset + size > i_size_read(inode) ||
+ offset + size < 0) {
ioend = xfs_alloc_ioend(inode, type);
ioend->io_offset = offset;
ioend->io_size = size;
@@ -1345,7 +1377,8 @@ __xfs_get_blocks(
sector_t iblock,
struct buffer_head *bh_result,
int create,
- bool direct)
+ bool direct,
+ bool dax_fault)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
@@ -1393,18 +1426,20 @@ __xfs_get_blocks(
if (error)
goto out_unlock;
+ /* for DAX, we convert unwritten extents directly */
if (create &&
(!nimaps ||
(imap.br_startblock == HOLESTARTBLOCK ||
- imap.br_startblock == DELAYSTARTBLOCK))) {
+ imap.br_startblock == DELAYSTARTBLOCK) ||
+ (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
if (direct || xfs_get_extsz_hint(ip)) {
/*
- * Drop the ilock in preparation for starting the block
- * allocation transaction. It will be retaken
- * exclusively inside xfs_iomap_write_direct for the
- * actual allocation.
+ * xfs_iomap_write_direct() expects the shared lock. It
+ * is unlocked on return.
*/
- xfs_iunlock(ip, lockmode);
+ if (lockmode == XFS_ILOCK_EXCL)
+ xfs_ilock_demote(ip, lockmode);
+
error = xfs_iomap_write_direct(ip, offset, size,
&imap, nimaps);
if (error)
@@ -1441,6 +1476,12 @@ __xfs_get_blocks(
goto out_unlock;
}
+ if (IS_DAX(inode) && create) {
+ ASSERT(!ISUNWRITTEN(&imap));
+ /* zeroing is not needed at a higher layer */
+ new = 0;
+ }
+
/* trim mapping down to size requested */
if (direct || size > (1 << inode->i_blkbits))
xfs_map_trim_size(inode, iblock, bh_result,
@@ -1458,7 +1499,8 @@ __xfs_get_blocks(
set_buffer_unwritten(bh_result);
/* direct IO needs special help */
if (create && direct)
- xfs_map_direct(inode, bh_result, &imap, offset);
+ xfs_map_direct(inode, bh_result, &imap, offset,
+ dax_fault);
}
/*
@@ -1505,7 +1547,7 @@ xfs_get_blocks(
struct buffer_head *bh_result,
int create)
{
- return __xfs_get_blocks(inode, iblock, bh_result, create, false);
+ return __xfs_get_blocks(inode, iblock, bh_result, create, false, false);
}
int
@@ -1515,7 +1557,17 @@ xfs_get_blocks_direct(
struct buffer_head *bh_result,
int create)
{
- return __xfs_get_blocks(inode, iblock, bh_result, create, true);
+ return __xfs_get_blocks(inode, iblock, bh_result, create, true, false);
+}
+
+int
+xfs_get_blocks_dax_fault(
+ struct inode *inode,
+ sector_t iblock,
+ struct buffer_head *bh_result,
+ int create)
+{
+ return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
}
static void
@@ -1614,45 +1666,6 @@ xfs_end_io_direct_write(
__xfs_end_io_direct_write(inode, ioend, offset, size);
}
-/*
- * For DAX we need a mapping buffer callback for unwritten extent conversion
- * when page faults allocate blocks and then zero them. Note that in this
- * case the mapping indicated by the ioend may extend beyond EOF. We most
- * definitely do not want to extend EOF here, so we trim back the ioend size to
- * EOF.
- */
-#ifdef CONFIG_FS_DAX
-void
-xfs_end_io_dax_write(
- struct buffer_head *bh,
- int uptodate)
-{
- struct xfs_ioend *ioend = bh->b_private;
- struct inode *inode = ioend->io_inode;
- ssize_t size = ioend->io_size;
-
- ASSERT(IS_DAX(ioend->io_inode));
-
- /* if there was an error zeroing, then don't convert it */
- if (!uptodate)
- ioend->io_error = -EIO;
-
- /*
- * Trim update to EOF, so we don't extend EOF during unwritten extent
- * conversion of partial EOF blocks.
- */
- spin_lock(&XFS_I(inode)->i_flags_lock);
- if (ioend->io_offset + size > i_size_read(inode))
- size = i_size_read(inode) - ioend->io_offset;
- spin_unlock(&XFS_I(inode)->i_flags_lock);
-
- __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size);
-
-}
-#else
-void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { }
-#endif
-
static inline ssize_t
xfs_vm_do_dio(
struct inode *inode,
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 86afd1ac7895..f6ffc9ae5ceb 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -58,7 +58,8 @@ int xfs_get_blocks(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create);
int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create);
-void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate);
+int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
+ struct buffer_head *map_bh, int create);
extern void xfs_count_page_state(struct page *, int *, int *);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 65fb37a18e92..0ef7c2ed3f8a 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -511,7 +511,7 @@ xfs_attr_list_int(
xfs_inode_t *dp = context->dp;
uint lock_mode;
- XFS_STATS_INC(xs_attr_list);
+ XFS_STATS_INC(dp->i_mount, xs_attr_list);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 3bf4ad0d19e4..dbae6490a79a 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -57,6 +57,35 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
}
/*
+ * Routine to zero an extent on disk allocated to the specific inode.
+ *
+ * The VFS functions take a linearised filesystem block offset, so we have to
+ * convert the sparse xfs fsb to the right format first.
+ * VFS types are real funky, too.
+ */
+int
+xfs_zero_extent(
+ struct xfs_inode *ip,
+ xfs_fsblock_t start_fsb,
+ xfs_off_t count_fsb)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb);
+ sector_t block = XFS_BB_TO_FSBT(mp, sector);
+ ssize_t size = XFS_FSB_TO_B(mp, count_fsb);
+
+ if (IS_DAX(VFS_I(ip)))
+ return dax_clear_blocks(VFS_I(ip), block, size);
+
+ /*
+ * let the block layer decide on the fastest method of
+ * implementing the zeroing.
+ */
+ return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS);
+
+}
+
+/*
* Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
* caller. Frees all the extents that need freeing, which must be done
* last due to locking considerations. We never free any extents in
@@ -229,6 +258,13 @@ xfs_bmap_rtalloc(
xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
+
+ /* Zero the extent if we were asked to do so */
+ if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) {
+ error = xfs_zero_extent(ap->ip, ap->blkno, ap->length);
+ if (error)
+ return error;
+ }
} else {
ap->length = 0;
}
@@ -1027,7 +1063,7 @@ xfs_alloc_file_space(
xfs_bmap_init(&free_list, &firstfsb);
error = xfs_bmapi_write(tp, ip, startoffset_fsb,
allocatesize_fsb, alloc_type, &firstfsb,
- 0, imapp, &nimaps, &free_list);
+ resblks, imapp, &nimaps, &free_list);
if (error) {
goto error0;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8ecffb35935b..3243cdf97f33 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -201,7 +201,7 @@ _xfs_buf_alloc(
atomic_set(&bp->b_pin_count, 0);
init_waitqueue_head(&bp->b_waiters);
- XFS_STATS_INC(xb_create);
+ XFS_STATS_INC(target->bt_mount, xb_create);
trace_xfs_buf_init(bp, _RET_IP_);
return bp;
@@ -354,15 +354,16 @@ retry:
*/
if (!(++retries % 100))
xfs_err(NULL,
- "possible memory allocation deadlock in %s (mode:0x%x)",
+ "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
+ current->comm, current->pid,
__func__, gfp_mask);
- XFS_STATS_INC(xb_page_retries);
+ XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
- XFS_STATS_INC(xb_page_found);
+ XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found);
nbytes = min_t(size_t, size, PAGE_SIZE - offset);
size -= nbytes;
@@ -516,7 +517,7 @@ _xfs_buf_find(
new_bp->b_pag = pag;
spin_unlock(&pag->pag_buf_lock);
} else {
- XFS_STATS_INC(xb_miss_locked);
+ XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
}
@@ -529,11 +530,11 @@ found:
if (!xfs_buf_trylock(bp)) {
if (flags & XBF_TRYLOCK) {
xfs_buf_rele(bp);
- XFS_STATS_INC(xb_busy_locked);
+ XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
return NULL;
}
xfs_buf_lock(bp);
- XFS_STATS_INC(xb_get_locked_waited);
+ XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
}
/*
@@ -549,7 +550,7 @@ found:
}
trace_xfs_buf_find(bp, flags, _RET_IP_);
- XFS_STATS_INC(xb_get_locked);
+ XFS_STATS_INC(btp->bt_mount, xb_get_locked);
return bp;
}
@@ -603,7 +604,7 @@ found:
}
}
- XFS_STATS_INC(xb_get);
+ XFS_STATS_INC(target->bt_mount, xb_get);
trace_xfs_buf_get(bp, flags, _RET_IP_);
return bp;
}
@@ -643,7 +644,7 @@ xfs_buf_read_map(
trace_xfs_buf_read(bp, flags, _RET_IP_);
if (!XFS_BUF_ISDONE(bp)) {
- XFS_STATS_INC(xb_get_read);
+ XFS_STATS_INC(target->bt_mount, xb_get_read);
bp->b_ops = ops;
_xfs_buf_read(bp, flags);
} else if (flags & XBF_ASYNC) {
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index a989a9c7edb7..642d55d10075 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -666,7 +666,7 @@ xfs_readdir(
return -EIO;
ASSERT(S_ISDIR(dp->i_d.di_mode));
- XFS_STATS_INC(xs_dir_getdents);
+ XFS_STATS_INC(dp->i_mount, xs_dir_getdents);
args.dp = dp;
args.geo = dp->i_mount->m_dir_geo;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 30cb3afb67f0..7ac6c5c586cb 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -75,9 +75,9 @@ xfs_qm_dqdestroy(
ASSERT(list_empty(&dqp->q_lru));
mutex_destroy(&dqp->q_qlock);
- kmem_zone_free(xfs_qm_dqzone, dqp);
- XFS_STATS_DEC(xs_qm_dquot);
+ XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot);
+ kmem_zone_free(xfs_qm_dqzone, dqp);
}
/*
@@ -605,7 +605,7 @@ xfs_qm_dqread(
break;
}
- XFS_STATS_INC(xs_qm_dquot);
+ XFS_STATS_INC(mp, xs_qm_dquot);
trace_xfs_dqread(dqp);
@@ -747,12 +747,12 @@ restart:
mutex_unlock(&qi->qi_tree_lock);
trace_xfs_dqget_hit(dqp);
- XFS_STATS_INC(xs_qm_dqcachehits);
+ XFS_STATS_INC(mp, xs_qm_dqcachehits);
*O_dqpp = dqp;
return 0;
}
mutex_unlock(&qi->qi_tree_lock);
- XFS_STATS_INC(xs_qm_dqcachemisses);
+ XFS_STATS_INC(mp, xs_qm_dqcachemisses);
/*
* Dquot cache miss. We don't want to keep the inode lock across
@@ -806,7 +806,7 @@ restart:
mutex_unlock(&qi->qi_tree_lock);
trace_xfs_dqget_dup(dqp);
xfs_qm_dqdestroy(dqp);
- XFS_STATS_INC(xs_qm_dquot_dups);
+ XFS_STATS_INC(mp, xs_qm_dquot_dups);
goto restart;
}
@@ -846,7 +846,7 @@ xfs_qm_dqput(
trace_xfs_dqput_free(dqp);
if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
- XFS_STATS_INC(xs_qm_dquot_unused);
+ XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused);
}
xfs_dqunlock(dqp);
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e78feb400e22..f5392ab2def1 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -242,19 +242,30 @@ xfs_file_fsync(
}
/*
- * All metadata updates are logged, which means that we just have
- * to flush the log up to the latest LSN that touched the inode.
+ * All metadata updates are logged, which means that we just have to
+ * flush the log up to the latest LSN that touched the inode. If we have
+ * concurrent fsync/fdatasync() calls, we need them to all block on the
+ * log force before we clear the ili_fsync_fields field. This ensures
+ * that we don't get a racing sync operation that does not wait for the
+ * metadata to hit the journal before returning. If we race with
+ * clearing the ili_fsync_fields, then all that will happen is the log
+ * force will do nothing as the lsn will already be on disk. We can't
+ * race with setting ili_fsync_fields because that is done under
+ * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
+ * until after the ili_fsync_fields is cleared.
*/
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip)) {
if (!datasync ||
- (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
+ (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
lsn = ip->i_itemp->ili_last_lsn;
}
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- if (lsn)
+ if (lsn) {
error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
+ ip->i_itemp->ili_fsync_fields = 0;
+ }
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
/*
* If we only have a single device, and the log force about was
@@ -287,7 +298,7 @@ xfs_file_read_iter(
xfs_fsize_t n;
loff_t pos = iocb->ki_pos;
- XFS_STATS_INC(xs_read_calls);
+ XFS_STATS_INC(mp, xs_read_calls);
if (unlikely(iocb->ki_flags & IOCB_DIRECT))
ioflags |= XFS_IO_ISDIRECT;
@@ -365,7 +376,7 @@ xfs_file_read_iter(
ret = generic_file_read_iter(iocb, to);
if (ret > 0)
- XFS_STATS_ADD(xs_read_bytes, ret);
+ XFS_STATS_ADD(mp, xs_read_bytes, ret);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
@@ -383,7 +394,7 @@ xfs_file_splice_read(
int ioflags = 0;
ssize_t ret;
- XFS_STATS_INC(xs_read_calls);
+ XFS_STATS_INC(ip->i_mount, xs_read_calls);
if (infilp->f_mode & FMODE_NOCMTIME)
ioflags |= XFS_IO_INVIS;
@@ -401,7 +412,7 @@ xfs_file_splice_read(
else
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
if (ret > 0)
- XFS_STATS_ADD(xs_read_bytes, ret);
+ XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
@@ -482,6 +493,8 @@ xfs_zero_eof(
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(offset > isize);
+ trace_xfs_zero_eof(ip, isize, offset - isize);
+
/*
* First handle zeroing the block on which isize resides.
*
@@ -574,6 +587,7 @@ xfs_file_aio_write_checks(
struct xfs_inode *ip = XFS_I(inode);
ssize_t error = 0;
size_t count = iov_iter_count(from);
+ bool drained_dio = false;
restart:
error = generic_write_checks(iocb, from);
@@ -611,12 +625,13 @@ restart:
bool zero = false;
spin_unlock(&ip->i_flags_lock);
- if (*iolock == XFS_IOLOCK_SHARED) {
- xfs_rw_iunlock(ip, *iolock);
- *iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, *iolock);
- iov_iter_reexpand(from, count);
-
+ if (!drained_dio) {
+ if (*iolock == XFS_IOLOCK_SHARED) {
+ xfs_rw_iunlock(ip, *iolock);
+ *iolock = XFS_IOLOCK_EXCL;
+ xfs_rw_ilock(ip, *iolock);
+ iov_iter_reexpand(from, count);
+ }
/*
* We now have an IO submission barrier in place, but
* AIO can do EOF updates during IO completion and hence
@@ -626,6 +641,7 @@ restart:
* no-op.
*/
inode_dio_wait(inode);
+ drained_dio = true;
goto restart;
}
error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
@@ -867,7 +883,7 @@ xfs_file_write_iter(
ssize_t ret;
size_t ocount = iov_iter_count(from);
- XFS_STATS_INC(xs_write_calls);
+ XFS_STATS_INC(ip->i_mount, xs_write_calls);
if (ocount == 0)
return 0;
@@ -883,7 +899,7 @@ xfs_file_write_iter(
if (ret > 0) {
ssize_t err;
- XFS_STATS_ADD(xs_write_bytes, ret);
+ XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
/* Handle various SYNC-type writes */
err = generic_write_sync(file, iocb->ki_pos - ret, ret);
@@ -1477,7 +1493,7 @@ xfs_file_llseek(
*
* mmap_sem (MM)
* sb_start_pagefault(vfs, freeze)
- * i_mmap_lock (XFS - truncate serialisation)
+ * i_mmaplock (XFS - truncate serialisation)
* page_lock (MM)
* i_lock (XFS - extent map serialisation)
*/
@@ -1503,10 +1519,9 @@ xfs_filemap_page_mkwrite(
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
- ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct,
- xfs_end_io_dax_write);
+ ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault, NULL);
} else {
- ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
+ ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
ret = block_page_mkwrite_return(ret);
}
@@ -1538,7 +1553,7 @@ xfs_filemap_fault(
* changes to xfs_get_blocks_direct() to map unwritten extent
* ioend for conversion on read-only mappings.
*/
- ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL);
+ ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault, NULL);
} else
ret = filemap_fault(vma, vmf);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
@@ -1546,6 +1561,13 @@ xfs_filemap_fault(
return ret;
}
+/*
+ * Similar to xfs_filemap_fault(), the DAX fault path can call into here on
+ * both read and write faults. Hence we need to handle both cases. There is no
+ * ->pmd_mkwrite callout for huge pages, so we have a single function here to
+ * handle both cases here. @flags carries the information on the type of fault
+ * occuring.
+ */
STATIC int
xfs_filemap_pmd_fault(
struct vm_area_struct *vma,
@@ -1562,15 +1584,54 @@ xfs_filemap_pmd_fault(
trace_xfs_filemap_pmd_fault(ip);
- sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ if (flags & FAULT_FLAG_WRITE) {
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vma->vm_file);
+ }
+
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_direct,
- xfs_end_io_dax_write);
+ ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault,
+ NULL);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- sb_end_pagefault(inode->i_sb);
+ if (flags & FAULT_FLAG_WRITE)
+ sb_end_pagefault(inode->i_sb);
+
+ return ret;
+}
+
+/*
+ * pfn_mkwrite was originally inteneded to ensure we capture time stamp
+ * updates on write faults. In reality, it's need to serialise against
+ * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
+ * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
+ * barrier in place.
+ */
+static int
+xfs_filemap_pfn_mkwrite(
+ struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+
+ struct inode *inode = file_inode(vma->vm_file);
+ struct xfs_inode *ip = XFS_I(inode);
+ int ret = VM_FAULT_NOPAGE;
+ loff_t size;
+
+ trace_xfs_filemap_pfn_mkwrite(ip);
+
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vma->vm_file);
+
+ /* check if the faulting page hasn't raced with truncate */
+ xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+ size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (vmf->pgoff >= size)
+ ret = VM_FAULT_SIGBUS;
+ xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ sb_end_pagefault(inode->i_sb);
return ret;
+
}
static const struct vm_operations_struct xfs_file_vm_ops = {
@@ -1578,6 +1639,7 @@ static const struct vm_operations_struct xfs_file_vm_ops = {
.pmd_fault = xfs_filemap_pmd_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = xfs_filemap_page_mkwrite,
+ .pfn_mkwrite = xfs_filemap_pfn_mkwrite,
};
STATIC int
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 0a326bd64d4e..d7a490f24ead 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -63,7 +63,7 @@ xfs_inode_alloc(
return NULL;
}
- XFS_STATS_INC(vn_active);
+ XFS_STATS_INC(mp, vn_active);
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(!xfs_isiflocked(ip));
@@ -129,7 +129,7 @@ xfs_inode_free(
/* asserts to verify all state is correct here */
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!xfs_isiflocked(ip));
- XFS_STATS_DEC(vn_active);
+ XFS_STATS_DEC(ip->i_mount, vn_active);
call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
}
@@ -159,7 +159,7 @@ xfs_iget_cache_hit(
spin_lock(&ip->i_flags_lock);
if (ip->i_ino != ino) {
trace_xfs_iget_skip(ip);
- XFS_STATS_INC(xs_ig_frecycle);
+ XFS_STATS_INC(mp, xs_ig_frecycle);
error = -EAGAIN;
goto out_error;
}
@@ -177,7 +177,7 @@ xfs_iget_cache_hit(
*/
if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
trace_xfs_iget_skip(ip);
- XFS_STATS_INC(xs_ig_frecycle);
+ XFS_STATS_INC(mp, xs_ig_frecycle);
error = -EAGAIN;
goto out_error;
}
@@ -259,7 +259,7 @@ xfs_iget_cache_hit(
xfs_ilock(ip, lock_flags);
xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
- XFS_STATS_INC(xs_ig_found);
+ XFS_STATS_INC(mp, xs_ig_found);
return 0;
@@ -342,7 +342,7 @@ xfs_iget_cache_miss(
error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
if (unlikely(error)) {
WARN_ON(error != -EEXIST);
- XFS_STATS_INC(xs_ig_dup);
+ XFS_STATS_INC(mp, xs_ig_dup);
error = -EAGAIN;
goto out_preload_end;
}
@@ -412,7 +412,7 @@ xfs_iget(
if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
return -EINVAL;
- XFS_STATS_INC(xs_ig_attempts);
+ XFS_STATS_INC(mp, xs_ig_attempts);
/* get the perag structure and ensure that it's inode capable */
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
@@ -429,7 +429,7 @@ again:
goto out_error_or_again;
} else {
rcu_read_unlock();
- XFS_STATS_INC(xs_ig_missed);
+ XFS_STATS_INC(mp, xs_ig_missed);
error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
flags, lock_flags);
@@ -965,7 +965,7 @@ reclaim:
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- XFS_STATS_INC(xs_ig_reclaims);
+ XFS_STATS_INC(ip->i_mount, xs_ig_reclaims);
/*
* Remove the inode from the per-AG radix tree.
*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index dc40a6d5ae0d..8ee393996b7d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2365,6 +2365,7 @@ retry:
iip->ili_last_fields = iip->ili_fields;
iip->ili_fields = 0;
+ iip->ili_fsync_fields = 0;
iip->ili_logged = 1;
xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
&iip->ili_item.li_lsn);
@@ -3271,8 +3272,8 @@ xfs_iflush_cluster(
}
if (clcount) {
- XFS_STATS_INC(xs_icluster_flushcnt);
- XFS_STATS_ADD(xs_icluster_flushinode, clcount);
+ XFS_STATS_INC(mp, xs_icluster_flushcnt);
+ XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
}
out_free:
@@ -3345,7 +3346,7 @@ xfs_iflush(
struct xfs_dinode *dip;
int error;
- XFS_STATS_INC(xs_iflush_count);
+ XFS_STATS_INC(mp, xs_iflush_count);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(xfs_isiflocked(ip));
@@ -3560,6 +3561,7 @@ xfs_iflush_int(
*/
iip->ili_last_fields = iip->ili_fields;
iip->ili_fields = 0;
+ iip->ili_fsync_fields = 0;
iip->ili_logged = 1;
xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 62bd80f4edd9..d14b12b8cfef 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -719,6 +719,7 @@ xfs_iflush_abort(
* attempted.
*/
iip->ili_fields = 0;
+ iip->ili_fsync_fields = 0;
}
/*
* Release the inode's flush lock since we're done with it.
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 488d81254e28..4c7722e325b3 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -34,6 +34,7 @@ typedef struct xfs_inode_log_item {
unsigned short ili_logged; /* flushed logged data */
unsigned int ili_last_fields; /* fields when flushed */
unsigned int ili_fields; /* fields to be logged */
+ unsigned int ili_fsync_fields; /* logged since last fsync */
} xfs_inode_log_item_t;
static inline int xfs_inode_clean(xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index ea7d85af5310..d42738deec6d 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -40,6 +40,7 @@
#include "xfs_symlink.h"
#include "xfs_trans.h"
#include "xfs_pnfs.h"
+#include "xfs_acl.h"
#include <linux/capability.h>
#include <linux/dcache.h>
@@ -411,7 +412,7 @@ xfs_attrlist_by_handle(
if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
return -EFAULT;
if (al_hreq.buflen < sizeof(struct attrlist) ||
- al_hreq.buflen > XATTR_LIST_MAX)
+ al_hreq.buflen > XFS_XATTR_LIST_MAX)
return -EINVAL;
/*
@@ -455,7 +456,7 @@ xfs_attrmulti_attr_get(
unsigned char *kbuf;
int error = -EFAULT;
- if (*len > XATTR_SIZE_MAX)
+ if (*len > XFS_XATTR_SIZE_MAX)
return -EINVAL;
kbuf = kmem_zalloc_large(*len, KM_SLEEP);
if (!kbuf)
@@ -482,17 +483,22 @@ xfs_attrmulti_attr_set(
__uint32_t flags)
{
unsigned char *kbuf;
+ int error;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return -EPERM;
- if (len > XATTR_SIZE_MAX)
+ if (len > XFS_XATTR_SIZE_MAX)
return -EINVAL;
kbuf = memdup_user(ubuf, len);
if (IS_ERR(kbuf))
return PTR_ERR(kbuf);
- return xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
+ error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
+ if (!error)
+ xfs_forget_acl(inode, name, flags);
+ kfree(kbuf);
+ return error;
}
int
@@ -501,9 +507,14 @@ xfs_attrmulti_attr_remove(
unsigned char *name,
__uint32_t flags)
{
+ int error;
+
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return -EPERM;
- return xfs_attr_remove(XFS_I(inode), name, flags);
+ error = xfs_attr_remove(XFS_I(inode), name, flags);
+ if (!error)
+ xfs_forget_acl(inode, name, flags);
+ return error;
}
STATIC int
@@ -1028,7 +1039,7 @@ xfs_ioctl_setattr_xflags(
xfs_diflags_to_linux(ip);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- XFS_STATS_INC(xs_ig_attrchg);
+ XFS_STATS_INC(mp, xs_ig_attrchg);
return 0;
}
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index b88bdc85dd3d..1a05d8ae327d 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -356,7 +356,7 @@ xfs_compat_attrlist_by_handle(
sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
return -EFAULT;
if (al_hreq.buflen < sizeof(struct attrlist) ||
- al_hreq.buflen > XATTR_LIST_MAX)
+ al_hreq.buflen > XFS_XATTR_LIST_MAX)
return -EINVAL;
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 1f86033171c8..f4f5b43cf647 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -131,20 +131,30 @@ xfs_iomap_write_direct(
uint qblocks, resblks, resrtextents;
int committed;
int error;
-
- error = xfs_qm_dqattach(ip, 0);
- if (error)
- return error;
+ int lockmode;
+ int bmapi_flags = XFS_BMAPI_PREALLOC;
rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip);
+ lockmode = XFS_ILOCK_SHARED; /* locked by caller */
+
+ ASSERT(xfs_isilocked(ip, lockmode));
offset_fsb = XFS_B_TO_FSBT(mp, offset);
last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
if ((offset + count) > XFS_ISIZE(ip)) {
+ /*
+ * Assert that the in-core extent list is present since this can
+ * call xfs_iread_extents() and we only have the ilock shared.
+ * This should be safe because the lock was held around a bmapi
+ * call in the caller and we only need it to access the in-core
+ * list.
+ */
+ ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
+ XFS_IFEXTENTS);
error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
if (error)
- return error;
+ goto out_unlock;
} else {
if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
last_fsb = MIN(last_fsb, (xfs_fileoff_t)
@@ -174,9 +184,35 @@ xfs_iomap_write_direct(
}
/*
+ * Drop the shared lock acquired by the caller, attach the dquot if
+ * necessary and move on to transaction setup.
+ */
+ xfs_iunlock(ip, lockmode);
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
+ return error;
+
+ /*
* Allocate and setup the transaction
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+
+ /*
+ * For DAX, we do not allocate unwritten extents, but instead we zero
+ * the block before we commit the transaction. Ideally we'd like to do
+ * this outside the transaction context, but if we commit and then crash
+ * we may not have zeroed the blocks and this will be exposed on
+ * recovery of the allocation. Hence we must zero before commit.
+ * Further, if we are mapping unwritten extents here, we need to zero
+ * and convert them to written so that we don't need an unwritten extent
+ * callback for DAX. This also means that we need to be able to dip into
+ * the reserve block pool if there is no space left but we need to do
+ * unwritten extent conversion.
+ */
+ if (IS_DAX(VFS_I(ip))) {
+ bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
+ tp->t_flags |= XFS_TRANS_RESERVE;
+ }
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
resblks, resrtextents);
/*
@@ -187,7 +223,8 @@ xfs_iomap_write_direct(
return error;
}
- xfs_ilock(ip, XFS_ILOCK_EXCL);
+ lockmode = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, lockmode);
error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
if (error)
@@ -202,8 +239,8 @@ xfs_iomap_write_direct(
xfs_bmap_init(&free_list, &firstfsb);
nimaps = 1;
error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
- XFS_BMAPI_PREALLOC, &firstfsb, 0,
- imap, &nimaps, &free_list);
+ bmapi_flags, &firstfsb, resblks, imap,
+ &nimaps, &free_list);
if (error)
goto out_bmap_cancel;
@@ -213,6 +250,7 @@ xfs_iomap_write_direct(
error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error)
goto out_bmap_cancel;
+
error = xfs_trans_commit(tp);
if (error)
goto out_unlock;
@@ -229,7 +267,7 @@ xfs_iomap_write_direct(
error = xfs_alert_fsblock_zero(ip, imap);
out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, lockmode);
return error;
out_bmap_cancel:
@@ -670,7 +708,7 @@ xfs_iomap_write_allocate(
count_fsb = imap->br_blockcount;
map_start_fsb = imap->br_startoff;
- XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
+ XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
while (count_fsb != 0) {
/*
@@ -750,9 +788,9 @@ xfs_iomap_write_allocate(
* pointer that the caller gave to us.
*/
error = xfs_bmapi_write(tp, ip, map_start_fsb,
- count_fsb, 0,
- &first_block, 1,
- imap, &nimaps, &free_list);
+ count_fsb, 0, &first_block,
+ nres, imap, &nimaps,
+ &free_list);
if (error)
goto trans_cancel;
@@ -777,7 +815,7 @@ xfs_iomap_write_allocate(
if ((offset_fsb >= imap->br_startoff) &&
(offset_fsb < (imap->br_startoff +
imap->br_blockcount))) {
- XFS_STATS_INC(xs_xstrat_quick);
+ XFS_STATS_INC(mp, xs_xstrat_quick);
return 0;
}
@@ -866,8 +904,8 @@ xfs_iomap_write_unwritten(
xfs_bmap_init(&free_list, &firstfsb);
nimaps = 1;
error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
- XFS_BMAPI_CONVERT, &firstfsb,
- 1, &imap, &nimaps, &free_list);
+ XFS_BMAPI_CONVERT, &firstfsb, resblks,
+ &imap, &nimaps, &free_list);
if (error)
goto error_on_bmapi_transaction;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 8294132e6a3c..245268a0cdf0 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -695,7 +695,7 @@ xfs_setattr_nonsize(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- XFS_STATS_INC(xs_ig_attrchg);
+ XFS_STATS_INC(mp, xs_ig_attrchg);
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp);
@@ -922,7 +922,7 @@ xfs_setattr_size(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- XFS_STATS_INC(xs_ig_attrchg);
+ XFS_STATS_INC(mp, xs_ig_attrchg);
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp);
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 85f883dd6207..ec0e239a0fa9 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -171,6 +171,13 @@ struct xfs_kobj {
struct completion complete;
};
+struct xstats {
+ struct xfsstats __percpu *xs_stats;
+ struct xfs_kobj xs_kobj;
+};
+
+extern struct xstats xfsstats;
+
/* Kernel uid/gid conversion. These are used to convert to/from the on disk
* uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally.
* The conversion here is type only, the value will remain the same since we
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index aaadee0969c9..f52c72a1a06f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -268,7 +268,7 @@ xlog_grant_head_wait(
__set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock(&head->lock);
- XFS_STATS_INC(xs_sleep_logspace);
+ XFS_STATS_INC(log->l_mp, xs_sleep_logspace);
trace_xfs_log_grant_sleep(log, tic);
schedule();
@@ -379,7 +379,7 @@ xfs_log_regrant(
if (XLOG_FORCED_SHUTDOWN(log))
return -EIO;
- XFS_STATS_INC(xs_try_logspace);
+ XFS_STATS_INC(mp, xs_try_logspace);
/*
* This is a new transaction on the ticket, so we need to change the
@@ -448,7 +448,7 @@ xfs_log_reserve(
if (XLOG_FORCED_SHUTDOWN(log))
return -EIO;
- XFS_STATS_INC(xs_try_logspace);
+ XFS_STATS_INC(mp, xs_try_logspace);
ASSERT(*ticp == NULL);
tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
@@ -1768,7 +1768,7 @@ xlog_sync(
int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
int size;
- XFS_STATS_INC(xs_log_writes);
+ XFS_STATS_INC(log->l_mp, xs_log_writes);
ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
/* Add for LR header */
@@ -1805,7 +1805,7 @@ xlog_sync(
bp = iclog->ic_bp;
XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
- XFS_STATS_ADD(xs_log_blocks, BTOBB(count));
+ XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
/* Do we need to split this write into 2 parts? */
if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
@@ -2422,11 +2422,20 @@ xlog_write(
&partial_copy_len);
xlog_verify_dest_ptr(log, ptr);
- /* copy region */
+ /*
+ * Copy region.
+ *
+ * Unmount records just log an opheader, so can have
+ * empty payloads with no data region to copy. Hence we
+ * only copy the payload if the vector says it has data
+ * to copy.
+ */
ASSERT(copy_len >= 0);
- memcpy(ptr, reg->i_addr + copy_off, copy_len);
- xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len);
-
+ if (copy_len > 0) {
+ memcpy(ptr, reg->i_addr + copy_off, copy_len);
+ xlog_write_adv_cnt(&ptr, &len, &log_offset,
+ copy_len);
+ }
copy_len += start_rec_copy + sizeof(xlog_op_header_t);
record_cnt++;
data_cnt += contwr ? copy_len : 0;
@@ -2913,7 +2922,7 @@ restart:
iclog = log->l_iclog;
if (iclog->ic_state != XLOG_STATE_ACTIVE) {
- XFS_STATS_INC(xs_log_noiclogs);
+ XFS_STATS_INC(log->l_mp, xs_log_noiclogs);
/* Wait for log writes to have flushed */
xlog_wait(&log->l_flush_wait, &log->l_icloglock);
@@ -3165,11 +3174,19 @@ xlog_state_switch_iclogs(
}
if (log->l_curr_block >= log->l_logBBsize) {
+ /*
+ * Rewind the current block before the cycle is bumped to make
+ * sure that the combined LSN never transiently moves forward
+ * when the log wraps to the next cycle. This is to support the
+ * unlocked sample of these fields from xlog_valid_lsn(). Most
+ * other cases should acquire l_icloglock.
+ */
+ log->l_curr_block -= log->l_logBBsize;
+ ASSERT(log->l_curr_block >= 0);
+ smp_wmb();
log->l_curr_cycle++;
if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM)
log->l_curr_cycle++;
- log->l_curr_block -= log->l_logBBsize;
- ASSERT(log->l_curr_block >= 0);
}
ASSERT(iclog == log->l_iclog);
log->l_iclog = iclog->ic_next;
@@ -3212,7 +3229,7 @@ _xfs_log_force(
struct xlog_in_core *iclog;
xfs_lsn_t lsn;
- XFS_STATS_INC(xs_log_force);
+ XFS_STATS_INC(mp, xs_log_force);
xlog_cil_force(log);
@@ -3297,7 +3314,7 @@ maybe_sleep:
spin_unlock(&log->l_icloglock);
return -EIO;
}
- XFS_STATS_INC(xs_log_force_sleep);
+ XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
/*
* No need to grab the log lock here since we're
@@ -3362,7 +3379,7 @@ _xfs_log_force_lsn(
ASSERT(lsn != 0);
- XFS_STATS_INC(xs_log_force);
+ XFS_STATS_INC(mp, xs_log_force);
lsn = xlog_cil_force_lsn(log, lsn);
if (lsn == NULLCOMMITLSN)
@@ -3411,7 +3428,7 @@ try_again:
(XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
- XFS_STATS_INC(xs_log_force_sleep);
+ XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_prev->ic_write_wait,
&log->l_icloglock);
@@ -3441,7 +3458,7 @@ try_again:
spin_unlock(&log->l_icloglock);
return -EIO;
}
- XFS_STATS_INC(xs_log_force_sleep);
+ XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
/*
* No need to grab the log lock here since we're
@@ -4023,3 +4040,45 @@ xlog_iclogs_empty(
return 1;
}
+/*
+ * Verify that an LSN stamped into a piece of metadata is valid. This is
+ * intended for use in read verifiers on v5 superblocks.
+ */
+bool
+xfs_log_check_lsn(
+ struct xfs_mount *mp,
+ xfs_lsn_t lsn)
+{
+ struct xlog *log = mp->m_log;
+ bool valid;
+
+ /*
+ * norecovery mode skips mount-time log processing and unconditionally
+ * resets the in-core LSN. We can't validate in this mode, but
+ * modifications are not allowed anyways so just return true.
+ */
+ if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+ return true;
+
+ /*
+ * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is
+ * handled by recovery and thus safe to ignore here.
+ */
+ if (lsn == NULLCOMMITLSN)
+ return true;
+
+ valid = xlog_valid_lsn(mp->m_log, lsn);
+
+ /* warn the user about what's gone wrong before verifier failure */
+ if (!valid) {
+ spin_lock(&log->l_icloglock);
+ xfs_warn(mp,
+"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). "
+"Please unmount and run xfs_repair (>= v4.3) to resolve.",
+ CYCLE_LSN(lsn), BLOCK_LSN(lsn),
+ log->l_curr_cycle, log->l_curr_block);
+ spin_unlock(&log->l_icloglock);
+ }
+
+ return valid;
+}
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 09d91d3166cd..aa533a7d50f2 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -181,5 +181,6 @@ bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
void xfs_log_work_queue(struct xfs_mount *mp);
void xfs_log_worker(struct work_struct *work);
void xfs_log_quiesce(struct xfs_mount *mp);
+bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
#endif /* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 950f3f94720c..8daba7491b13 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -560,4 +560,55 @@ static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
remove_wait_queue(wq, &wait);
}
+/*
+ * The LSN is valid so long as it is behind the current LSN. If it isn't, this
+ * means that the next log record that includes this metadata could have a
+ * smaller LSN. In turn, this means that the modification in the log would not
+ * replay.
+ */
+static inline bool
+xlog_valid_lsn(
+ struct xlog *log,
+ xfs_lsn_t lsn)
+{
+ int cur_cycle;
+ int cur_block;
+ bool valid = true;
+
+ /*
+ * First, sample the current lsn without locking to avoid added
+ * contention from metadata I/O. The current cycle and block are updated
+ * (in xlog_state_switch_iclogs()) and read here in a particular order
+ * to avoid false negatives (e.g., thinking the metadata LSN is valid
+ * when it is not).
+ *
+ * The current block is always rewound before the cycle is bumped in
+ * xlog_state_switch_iclogs() to ensure the current LSN is never seen in
+ * a transiently forward state. Instead, we can see the LSN in a
+ * transiently behind state if we happen to race with a cycle wrap.
+ */
+ cur_cycle = ACCESS_ONCE(log->l_curr_cycle);
+ smp_rmb();
+ cur_block = ACCESS_ONCE(log->l_curr_block);
+
+ if ((CYCLE_LSN(lsn) > cur_cycle) ||
+ (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) {
+ /*
+ * If the metadata LSN appears invalid, it's possible the check
+ * above raced with a wrap to the next log cycle. Grab the lock
+ * to check for sure.
+ */
+ spin_lock(&log->l_icloglock);
+ cur_cycle = log->l_curr_cycle;
+ cur_block = log->l_curr_block;
+ spin_unlock(&log->l_icloglock);
+
+ if ((CYCLE_LSN(lsn) > cur_cycle) ||
+ (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block))
+ valid = false;
+ }
+
+ return valid;
+}
+
#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 512a0945d52a..c5ecaacdd218 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3431,7 +3431,7 @@ xlog_recover_add_to_cont_trans(
* previous record. Copy the rest of the header.
*/
if (list_empty(&trans->r_itemq)) {
- ASSERT(len < sizeof(struct xfs_trans_header));
+ ASSERT(len <= sizeof(struct xfs_trans_header));
if (len > sizeof(struct xfs_trans_header)) {
xfs_warn(log->l_mp, "%s: bad header length", __func__);
return -EIO;
@@ -4609,9 +4609,19 @@ xlog_recover(
int error;
/* find the tail of the log */
- if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
+ error = xlog_find_tail(log, &head_blk, &tail_blk);
+ if (error)
return error;
+ /*
+ * The superblock was read before the log was available and thus the LSN
+ * could not be verified. Check the superblock LSN against the current
+ * LSN now that it's known.
+ */
+ if (xfs_sb_version_hascrc(&log->l_mp->m_sb) &&
+ !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn))
+ return -EINVAL;
+
if (tail_blk != head_blk) {
/* There used to be a comment here:
*
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index d8b67547ab34..11792d888e4e 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -17,6 +17,7 @@
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_error.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
@@ -43,6 +44,7 @@ void func(const struct xfs_mount *mp, const char *fmt, ...) \
{ \
struct va_format vaf; \
va_list args; \
+ int level; \
\
va_start(args, fmt); \
\
@@ -51,6 +53,11 @@ void func(const struct xfs_mount *mp, const char *fmt, ...) \
\
__xfs_printk(kern_level, mp, &vaf); \
va_end(args); \
+ \
+ if (!kstrtoint(kern_level, 0, &level) && \
+ level <= LOGLEVEL_ERR && \
+ xfs_error_level >= XFS_ERRLEVEL_HIGH) \
+ xfs_stack_trace(); \
} \
define_xfs_printk_level(xfs_emerg, KERN_EMERG);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index bf92e0c037c7..bb753b359bee 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -47,6 +47,16 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
static uuid_t *xfs_uuid_table;
+void
+xfs_uuid_table_free(void)
+{
+ if (xfs_uuid_table_size == 0)
+ return;
+ kmem_free(xfs_uuid_table);
+ xfs_uuid_table = NULL;
+ xfs_uuid_table_size = 0;
+}
+
/*
* See if the UUID is unique among mounted XFS filesystems.
* Mount fails if UUID is nil or a FS with the same UUID is already mounted.
@@ -693,10 +703,15 @@ xfs_mountfs(
if (error)
goto out;
- error = xfs_uuid_mount(mp);
+ error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype,
+ &mp->m_kobj, "stats");
if (error)
goto out_remove_sysfs;
+ error = xfs_uuid_mount(mp);
+ if (error)
+ goto out_del_stats;
+
/*
* Set the minimum read and write sizes
*/
@@ -971,6 +986,8 @@ xfs_mountfs(
xfs_da_unmount(mp);
out_remove_uuid:
xfs_uuid_unmount(mp);
+ out_del_stats:
+ xfs_sysfs_del(&mp->m_stats.xs_kobj);
out_remove_sysfs:
xfs_sysfs_del(&mp->m_kobj);
out:
@@ -1047,6 +1064,7 @@ xfs_unmountfs(
xfs_warn(mp, "Unable to update superblock counters. "
"Freespace may not be correct on next mount.");
+
xfs_log_unmount(mp);
xfs_da_unmount(mp);
xfs_uuid_unmount(mp);
@@ -1056,6 +1074,7 @@ xfs_unmountfs(
#endif
xfs_free_perag(mp);
+ xfs_sysfs_del(&mp->m_stats.xs_kobj);
xfs_sysfs_del(&mp->m_kobj);
}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7999e91cd49a..b57098481c10 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -127,6 +127,7 @@ typedef struct xfs_mount {
int64_t m_low_space[XFS_LOWSP_MAX];
/* low free space thresholds */
struct xfs_kobj m_kobj;
+ struct xstats m_stats; /* per-fs stats */
struct workqueue_struct *m_buf_workqueue;
struct workqueue_struct *m_data_workqueue;
@@ -312,6 +313,7 @@ typedef struct xfs_perag {
int pagb_count; /* pagb slots in use */
} xfs_perag_t;
+extern void xfs_uuid_table_free(void);
extern int xfs_log_sbcount(xfs_mount_t *);
extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
extern int xfs_mountfs(xfs_mount_t *mp);
@@ -336,4 +338,7 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
extern void xfs_set_low_space_thresholds(struct xfs_mount *);
+int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
+ xfs_off_t count_fsb);
+
#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index ab4a6066f7ca..dc6221942b85 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -181,6 +181,11 @@ xfs_fs_map_blocks(
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
+ /*
+ * xfs_iomap_write_direct() expects to take ownership of
+ * the shared ilock.
+ */
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
error = xfs_iomap_write_direct(ip, offset, length,
&imap, nimaps);
if (error)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 587174fd4f2c..532ab79d38fe 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -184,7 +184,7 @@ xfs_qm_dqpurge(
*/
ASSERT(!list_empty(&dqp->q_lru));
list_lru_del(&qi->qi_lru, &dqp->q_lru);
- XFS_STATS_DEC(xs_qm_dquot_unused);
+ XFS_STATS_DEC(mp, xs_qm_dquot_unused);
xfs_qm_dqdestroy(dqp);
return 0;
@@ -448,11 +448,11 @@ xfs_qm_dquot_isolate(
*/
if (dqp->q_nrefs) {
xfs_dqunlock(dqp);
- XFS_STATS_INC(xs_qm_dqwants);
+ XFS_STATS_INC(dqp->q_mount, xs_qm_dqwants);
trace_xfs_dqreclaim_want(dqp);
list_lru_isolate(lru, &dqp->q_lru);
- XFS_STATS_DEC(xs_qm_dquot_unused);
+ XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
return LRU_REMOVED;
}
@@ -496,19 +496,19 @@ xfs_qm_dquot_isolate(
ASSERT(dqp->q_nrefs == 0);
list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose);
- XFS_STATS_DEC(xs_qm_dquot_unused);
+ XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
trace_xfs_dqreclaim_done(dqp);
- XFS_STATS_INC(xs_qm_dqreclaims);
+ XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims);
return LRU_REMOVED;
out_miss_busy:
trace_xfs_dqreclaim_busy(dqp);
- XFS_STATS_INC(xs_qm_dqreclaim_misses);
+ XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
return LRU_SKIP;
out_unlock_dirty:
trace_xfs_dqreclaim_busy(dqp);
- XFS_STATS_INC(xs_qm_dqreclaim_misses);
+ XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
xfs_dqunlock(dqp);
spin_lock(lru_lock);
return LRU_RETRY;
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index f2240383d4bb..8686df6c7609 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -18,20 +18,21 @@
#include "xfs.h"
#include <linux/proc_fs.h>
-DEFINE_PER_CPU(struct xfsstats, xfsstats);
+struct xstats xfsstats;
-static int counter_val(int idx)
+static int counter_val(struct xfsstats __percpu *stats, int idx)
{
int val = 0, cpu;
for_each_possible_cpu(cpu)
- val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx));
+ val += *(((__u32 *)per_cpu_ptr(stats, cpu) + idx));
return val;
}
-static int xfs_stat_proc_show(struct seq_file *m, void *v)
+int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
{
int i, j;
+ int len = 0;
__uint64_t xs_xstrat_bytes = 0;
__uint64_t xs_write_bytes = 0;
__uint64_t xs_read_bytes = 0;
@@ -65,54 +66,59 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
};
/* Loop over all stats groups */
+
for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
- seq_printf(m, "%s", xstats[i].desc);
+ len += snprintf(buf + len, PATH_MAX - len, "%s",
+ xstats[i].desc);
/* inner loop does each group */
for (; j < xstats[i].endpoint; j++)
- seq_printf(m, " %u", counter_val(j));
- seq_putc(m, '\n');
+ len += snprintf(buf + len, PATH_MAX - len, " %u",
+ counter_val(stats, j));
+ len += snprintf(buf + len, PATH_MAX - len, "\n");
}
/* extra precision counters */
for_each_possible_cpu(i) {
- xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
- xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
- xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
+ xs_xstrat_bytes += per_cpu_ptr(stats, i)->xs_xstrat_bytes;
+ xs_write_bytes += per_cpu_ptr(stats, i)->xs_write_bytes;
+ xs_read_bytes += per_cpu_ptr(stats, i)->xs_read_bytes;
}
- seq_printf(m, "xpc %Lu %Lu %Lu\n",
+ len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
- seq_printf(m, "debug %u\n",
+ len += snprintf(buf + len, PATH_MAX-len, "debug %u\n",
#if defined(DEBUG)
1);
#else
0);
#endif
- return 0;
+
+ return len;
}
-static int xfs_stat_proc_open(struct inode *inode, struct file *file)
+void xfs_stats_clearall(struct xfsstats __percpu *stats)
{
- return single_open(file, xfs_stat_proc_show, NULL);
+ int c;
+ __uint32_t vn_active;
+
+ xfs_notice(NULL, "Clearing xfsstats");
+ for_each_possible_cpu(c) {
+ preempt_disable();
+ /* save vn_active, it's a universal truth! */
+ vn_active = per_cpu_ptr(stats, c)->vn_active;
+ memset(per_cpu_ptr(stats, c), 0, sizeof(*stats));
+ per_cpu_ptr(stats, c)->vn_active = vn_active;
+ preempt_enable();
+ }
}
-static const struct file_operations xfs_stat_proc_fops = {
- .owner = THIS_MODULE,
- .open = xfs_stat_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/* legacy quota interfaces */
#ifdef CONFIG_XFS_QUOTA
static int xqm_proc_show(struct seq_file *m, void *v)
{
/* maximum; incore; ratio free to inuse; freelist */
seq_printf(m, "%d\t%d\t%d\t%u\n",
- 0,
- counter_val(XFSSTAT_END_XQMSTAT),
- 0,
- counter_val(XFSSTAT_END_XQMSTAT + 1));
+ 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT),
+ 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT + 1));
return 0;
}
@@ -136,7 +142,7 @@ static int xqmstat_proc_show(struct seq_file *m, void *v)
seq_printf(m, "qm");
for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++)
- seq_printf(m, " %u", counter_val(j));
+ seq_printf(m, " %u", counter_val(xfsstats.xs_stats, j));
seq_putc(m, '\n');
return 0;
}
@@ -155,44 +161,35 @@ static const struct file_operations xqmstat_proc_fops = {
};
#endif /* CONFIG_XFS_QUOTA */
+#ifdef CONFIG_PROC_FS
int
xfs_init_procfs(void)
{
if (!proc_mkdir("fs/xfs", NULL))
+ return -ENOMEM;
+
+ if (!proc_symlink("fs/xfs/stat", NULL,
+ "/sys/fs/xfs/stats/stats"))
goto out;
- if (!proc_create("fs/xfs/stat", 0, NULL,
- &xfs_stat_proc_fops))
- goto out_remove_xfs_dir;
#ifdef CONFIG_XFS_QUOTA
if (!proc_create("fs/xfs/xqmstat", 0, NULL,
&xqmstat_proc_fops))
- goto out_remove_stat_file;
+ goto out;
if (!proc_create("fs/xfs/xqm", 0, NULL,
&xqm_proc_fops))
- goto out_remove_xqmstat_file;
+ goto out;
#endif
return 0;
-#ifdef CONFIG_XFS_QUOTA
- out_remove_xqmstat_file:
- remove_proc_entry("fs/xfs/xqmstat", NULL);
- out_remove_stat_file:
- remove_proc_entry("fs/xfs/stat", NULL);
-#endif
- out_remove_xfs_dir:
- remove_proc_entry("fs/xfs", NULL);
- out:
+out:
+ remove_proc_subtree("fs/xfs", NULL);
return -ENOMEM;
}
void
xfs_cleanup_procfs(void)
{
-#ifdef CONFIG_XFS_QUOTA
- remove_proc_entry("fs/xfs/xqm", NULL);
- remove_proc_entry("fs/xfs/xqmstat", NULL);
-#endif
- remove_proc_entry("fs/xfs/stat", NULL);
- remove_proc_entry("fs/xfs", NULL);
+ remove_proc_subtree("fs/xfs", NULL);
}
+#endif /* CONFIG_PROC_FS */
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index c8f238b8299a..483b0eff1988 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -19,8 +19,6 @@
#define __XFS_STATS_H__
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
#include <linux/percpu.h>
/*
@@ -215,15 +213,29 @@ struct xfsstats {
__uint64_t xs_read_bytes;
};
-DECLARE_PER_CPU(struct xfsstats, xfsstats);
+int xfs_stats_format(struct xfsstats __percpu *stats, char *buf);
+void xfs_stats_clearall(struct xfsstats __percpu *stats);
+extern struct xstats xfsstats;
-/*
- * We don't disable preempt, not too worried about poking the
- * wrong CPU's stat for now (also aggregated before reporting).
- */
-#define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++)
-#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--)
-#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc))
+#define XFS_STATS_INC(mp, v) \
+do { \
+ per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v++; \
+ per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v++; \
+} while (0)
+
+#define XFS_STATS_DEC(mp, v) \
+do { \
+ per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v--; \
+ per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v--; \
+} while (0)
+
+#define XFS_STATS_ADD(mp, v, inc) \
+do { \
+ per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v += (inc); \
+ per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v += (inc); \
+} while (0)
+
+#if defined(CONFIG_PROC_FS)
extern int xfs_init_procfs(void);
extern void xfs_cleanup_procfs(void);
@@ -231,10 +243,6 @@ extern void xfs_cleanup_procfs(void);
#else /* !CONFIG_PROC_FS */
-# define XFS_STATS_INC(count)
-# define XFS_STATS_DEC(count)
-# define XFS_STATS_ADD(count, inc)
-
static inline int xfs_init_procfs(void)
{
return 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 904f637cfa5f..36bd8825bfb0 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -838,17 +838,18 @@ xfs_init_mount_workqueues(
goto out_destroy_unwritten;
mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
- WQ_FREEZABLE, 0, mp->m_fsname);
+ WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
if (!mp->m_reclaim_workqueue)
goto out_destroy_cil;
mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
- WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname);
+ WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
+ mp->m_fsname);
if (!mp->m_log_workqueue)
goto out_destroy_reclaim;
mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
- WQ_FREEZABLE, 0, mp->m_fsname);
+ WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
if (!mp->m_eofblocks_workqueue)
goto out_destroy_log;
@@ -922,7 +923,7 @@ xfs_fs_destroy_inode(
trace_xfs_destroy_inode(ip);
- XFS_STATS_INC(vn_reclaim);
+ XFS_STATS_INC(ip->i_mount, vn_reclaim);
ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
@@ -983,8 +984,8 @@ xfs_fs_evict_inode(
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
- XFS_STATS_INC(vn_rele);
- XFS_STATS_INC(vn_remove);
+ XFS_STATS_INC(ip->i_mount, vn_rele);
+ XFS_STATS_INC(ip->i_mount, vn_remove);
xfs_inactive(ip);
}
@@ -1474,9 +1475,16 @@ xfs_fs_fill_super(
if (error)
goto out_destroy_workqueues;
+ /* Allocate stats memory before we do operations that might use it */
+ mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
+ if (!mp->m_stats.xs_stats) {
+ error = -ENOMEM;
+ goto out_destroy_counters;
+ }
+
error = xfs_readsb(mp, flags);
if (error)
- goto out_destroy_counters;
+ goto out_free_stats;
error = xfs_finish_flags(mp);
if (error)
@@ -1545,9 +1553,11 @@ xfs_fs_fill_super(
xfs_filestream_unmount(mp);
out_free_sb:
xfs_freesb(mp);
+ out_free_stats:
+ free_percpu(mp->m_stats.xs_stats);
out_destroy_counters:
xfs_destroy_percpu_counters(mp);
-out_destroy_workqueues:
+ out_destroy_workqueues:
xfs_destroy_mount_workqueues(mp);
out_close_devices:
xfs_close_devices(mp);
@@ -1574,6 +1584,7 @@ xfs_fs_put_super(
xfs_unmountfs(mp);
xfs_freesb(mp);
+ free_percpu(mp->m_stats.xs_stats);
xfs_destroy_percpu_counters(mp);
xfs_destroy_mount_workqueues(mp);
xfs_close_devices(mp);
@@ -1838,19 +1849,32 @@ init_xfs_fs(void)
xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
if (!xfs_kset) {
error = -ENOMEM;
- goto out_sysctl_unregister;;
+ goto out_sysctl_unregister;
}
+ xfsstats.xs_kobj.kobject.kset = xfs_kset;
+
+ xfsstats.xs_stats = alloc_percpu(struct xfsstats);
+ if (!xfsstats.xs_stats) {
+ error = -ENOMEM;
+ goto out_kset_unregister;
+ }
+
+ error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
+ "stats");
+ if (error)
+ goto out_free_stats;
+
#ifdef DEBUG
xfs_dbg_kobj.kobject.kset = xfs_kset;
error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
if (error)
- goto out_kset_unregister;
+ goto out_remove_stats_kobj;
#endif
error = xfs_qm_init();
if (error)
- goto out_remove_kobj;
+ goto out_remove_dbg_kobj;
error = register_filesystem(&xfs_fs_type);
if (error)
@@ -1859,11 +1883,15 @@ init_xfs_fs(void)
out_qm_exit:
xfs_qm_exit();
- out_remove_kobj:
+ out_remove_dbg_kobj:
#ifdef DEBUG
xfs_sysfs_del(&xfs_dbg_kobj);
- out_kset_unregister:
+ out_remove_stats_kobj:
#endif
+ xfs_sysfs_del(&xfsstats.xs_kobj);
+ out_free_stats:
+ free_percpu(xfsstats.xs_stats);
+ out_kset_unregister:
kset_unregister(xfs_kset);
out_sysctl_unregister:
xfs_sysctl_unregister();
@@ -1889,6 +1917,8 @@ exit_xfs_fs(void)
#ifdef DEBUG
xfs_sysfs_del(&xfs_dbg_kobj);
#endif
+ xfs_sysfs_del(&xfsstats.xs_kobj);
+ free_percpu(xfsstats.xs_stats);
kset_unregister(xfs_kset);
xfs_sysctl_unregister();
xfs_cleanup_procfs();
@@ -1896,6 +1926,7 @@ exit_xfs_fs(void)
xfs_mru_cache_uninit();
xfs_destroy_workqueues();
xfs_destroy_zones();
+ xfs_uuid_table_free();
}
module_init(init_xfs_fs);
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index a0c8067cea6f..aed74d3f8da9 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -19,6 +19,7 @@
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include "xfs_error.h"
+#include "xfs_stats.h"
static struct ctl_table_header *xfs_table_header;
@@ -31,22 +32,12 @@ xfs_stats_clear_proc_handler(
size_t *lenp,
loff_t *ppos)
{
- int c, ret, *valp = ctl->data;
- __uint32_t vn_active;
+ int ret, *valp = ctl->data;
ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
if (!ret && write && *valp) {
- xfs_notice(NULL, "Clearing xfsstats");
- for_each_possible_cpu(c) {
- preempt_disable();
- /* save vn_active, it's a universal truth! */
- vn_active = per_cpu(xfsstats, c).vn_active;
- memset(&per_cpu(xfsstats, c), 0,
- sizeof(struct xfsstats));
- per_cpu(xfsstats, c).vn_active = vn_active;
- preempt_enable();
- }
+ xfs_stats_clearall(xfsstats.xs_stats);
xfs_stats_clear = 0;
}
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index aa03670851d8..ee70f5dec9dc 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -21,11 +21,13 @@
#include "xfs_log_format.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
+#include "xfs_stats.h"
struct xfs_sysfs_attr {
struct attribute attr;
- ssize_t (*show)(char *buf, void *data);
- ssize_t (*store)(const char *buf, size_t count, void *data);
+ ssize_t (*show)(struct kobject *kobject, char *buf);
+ ssize_t (*store)(struct kobject *kobject, const char *buf,
+ size_t count);
};
static inline struct xfs_sysfs_attr *
@@ -38,6 +40,8 @@ to_attr(struct attribute *attr)
static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name)
#define XFS_SYSFS_ATTR_RO(name) \
static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name)
+#define XFS_SYSFS_ATTR_WO(name) \
+ static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_WO(name)
#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
@@ -51,14 +55,42 @@ struct kobj_type xfs_mp_ktype = {
.release = xfs_sysfs_release,
};
+STATIC ssize_t
+xfs_sysfs_object_show(
+ struct kobject *kobject,
+ struct attribute *attr,
+ char *buf)
+{
+ struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+
+ return xfs_attr->show ? xfs_attr->show(kobject, buf) : 0;
+}
+
+STATIC ssize_t
+xfs_sysfs_object_store(
+ struct kobject *kobject,
+ struct attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+
+ return xfs_attr->store ? xfs_attr->store(kobject, buf, count) : 0;
+}
+
+static const struct sysfs_ops xfs_sysfs_ops = {
+ .show = xfs_sysfs_object_show,
+ .store = xfs_sysfs_object_store,
+};
+
#ifdef DEBUG
/* debug */
STATIC ssize_t
log_recovery_delay_store(
+ struct kobject *kobject,
const char *buf,
- size_t count,
- void *data)
+ size_t count)
{
int ret;
int val;
@@ -77,8 +109,8 @@ log_recovery_delay_store(
STATIC ssize_t
log_recovery_delay_show(
- char *buf,
- void *data)
+ struct kobject *kobject,
+ char *buf)
{
return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay);
}
@@ -89,52 +121,87 @@ static struct attribute *xfs_dbg_attrs[] = {
NULL,
};
+struct kobj_type xfs_dbg_ktype = {
+ .release = xfs_sysfs_release,
+ .sysfs_ops = &xfs_sysfs_ops,
+ .default_attrs = xfs_dbg_attrs,
+};
+
+#endif /* DEBUG */
+
+/* stats */
+
+static inline struct xstats *
+to_xstats(struct kobject *kobject)
+{
+ struct xfs_kobj *kobj = to_kobj(kobject);
+
+ return container_of(kobj, struct xstats, xs_kobj);
+}
+
STATIC ssize_t
-xfs_dbg_show(
- struct kobject *kobject,
- struct attribute *attr,
- char *buf)
+stats_show(
+ struct kobject *kobject,
+ char *buf)
{
- struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+ struct xstats *stats = to_xstats(kobject);
- return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0;
+ return xfs_stats_format(stats->xs_stats, buf);
}
+XFS_SYSFS_ATTR_RO(stats);
STATIC ssize_t
-xfs_dbg_store(
- struct kobject *kobject,
- struct attribute *attr,
- const char *buf,
- size_t count)
+stats_clear_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
{
- struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+ int ret;
+ int val;
+ struct xstats *stats = to_xstats(kobject);
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
- return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0;
+ if (val != 1)
+ return -EINVAL;
+
+ xfs_stats_clearall(stats->xs_stats);
+ return count;
}
+XFS_SYSFS_ATTR_WO(stats_clear);
-static struct sysfs_ops xfs_dbg_ops = {
- .show = xfs_dbg_show,
- .store = xfs_dbg_store,
+static struct attribute *xfs_stats_attrs[] = {
+ ATTR_LIST(stats),
+ ATTR_LIST(stats_clear),
+ NULL,
};
-struct kobj_type xfs_dbg_ktype = {
+struct kobj_type xfs_stats_ktype = {
.release = xfs_sysfs_release,
- .sysfs_ops = &xfs_dbg_ops,
- .default_attrs = xfs_dbg_attrs,
+ .sysfs_ops = &xfs_sysfs_ops,
+ .default_attrs = xfs_stats_attrs,
};
-#endif /* DEBUG */
-
/* xlog */
+static inline struct xlog *
+to_xlog(struct kobject *kobject)
+{
+ struct xfs_kobj *kobj = to_kobj(kobject);
+
+ return container_of(kobj, struct xlog, l_kobj);
+}
+
STATIC ssize_t
log_head_lsn_show(
- char *buf,
- void *data)
+ struct kobject *kobject,
+ char *buf)
{
- struct xlog *log = data;
int cycle;
int block;
+ struct xlog *log = to_xlog(kobject);
spin_lock(&log->l_icloglock);
cycle = log->l_curr_cycle;
@@ -147,12 +214,12 @@ XFS_SYSFS_ATTR_RO(log_head_lsn);
STATIC ssize_t
log_tail_lsn_show(
- char *buf,
- void *data)
+ struct kobject *kobject,
+ char *buf)
{
- struct xlog *log = data;
int cycle;
int block;
+ struct xlog *log = to_xlog(kobject);
xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block);
return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
@@ -161,12 +228,13 @@ XFS_SYSFS_ATTR_RO(log_tail_lsn);
STATIC ssize_t
reserve_grant_head_show(
- char *buf,
- void *data)
+ struct kobject *kobject,
+ char *buf)
+
{
- struct xlog *log = data;
int cycle;
int bytes;
+ struct xlog *log = to_xlog(kobject);
xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes);
return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
@@ -175,12 +243,12 @@ XFS_SYSFS_ATTR_RO(reserve_grant_head);
STATIC ssize_t
write_grant_head_show(
- char *buf,
- void *data)
+ struct kobject *kobject,
+ char *buf)
{
- struct xlog *log = data;
int cycle;
int bytes;
+ struct xlog *log = to_xlog(kobject);
xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes);
return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
@@ -195,45 +263,8 @@ static struct attribute *xfs_log_attrs[] = {
NULL,
};
-static inline struct xlog *
-to_xlog(struct kobject *kobject)
-{
- struct xfs_kobj *kobj = to_kobj(kobject);
- return container_of(kobj, struct xlog, l_kobj);
-}
-
-STATIC ssize_t
-xfs_log_show(
- struct kobject *kobject,
- struct attribute *attr,
- char *buf)
-{
- struct xlog *log = to_xlog(kobject);
- struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
-
- return xfs_attr->show ? xfs_attr->show(buf, log) : 0;
-}
-
-STATIC ssize_t
-xfs_log_store(
- struct kobject *kobject,
- struct attribute *attr,
- const char *buf,
- size_t count)
-{
- struct xlog *log = to_xlog(kobject);
- struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
-
- return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0;
-}
-
-static struct sysfs_ops xfs_log_ops = {
- .show = xfs_log_show,
- .store = xfs_log_store,
-};
-
struct kobj_type xfs_log_ktype = {
.release = xfs_sysfs_release,
- .sysfs_ops = &xfs_log_ops,
+ .sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_log_attrs,
};
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index 240eee35f342..be692e59938d 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -22,6 +22,7 @@
extern struct kobj_type xfs_mp_ktype; /* xfs_mount */
extern struct kobj_type xfs_dbg_ktype; /* debug */
extern struct kobj_type xfs_log_ktype; /* xlog */
+extern struct kobj_type xfs_stats_ktype; /* stats */
static inline struct xfs_kobj *
to_kobj(struct kobject *kobject)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 5ed36b1e04c1..877079eb0f8f 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -689,6 +689,7 @@ DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
DEFINE_INODE_EVENT(xfs_filemap_fault);
DEFINE_INODE_EVENT(xfs_filemap_pmd_fault);
DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
+DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
DECLARE_EVENT_CLASS(xfs_iref_class,
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
@@ -1312,6 +1313,7 @@ DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
+DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
DECLARE_EVENT_CLASS(xfs_itrunc_class,
TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index a0ab1dae9c31..748b16aff45a 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -930,9 +930,9 @@ __xfs_trans_commit(
*/
if (sync) {
error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
- XFS_STATS_INC(xs_trans_sync);
+ XFS_STATS_INC(mp, xs_trans_sync);
} else {
- XFS_STATS_INC(xs_trans_async);
+ XFS_STATS_INC(mp, xs_trans_async);
}
return error;
@@ -955,7 +955,7 @@ out_unreserve:
xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
xfs_trans_free(tp);
- XFS_STATS_INC(xs_trans_empty);
+ XFS_STATS_INC(mp, xs_trans_empty);
return error;
}
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 1098cf490189..aa67339b9537 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -349,7 +349,7 @@ xfsaild_push(
xfs_ail_min_lsn(ailp))) {
ailp->xa_log_flush = 0;
- XFS_STATS_INC(xs_push_ail_flush);
+ XFS_STATS_INC(mp, xs_push_ail_flush);
xfs_log_force(mp, XFS_LOG_SYNC);
}
@@ -371,7 +371,7 @@ xfsaild_push(
goto out_done;
}
- XFS_STATS_INC(xs_push_ail);
+ XFS_STATS_INC(mp, xs_push_ail);
lsn = lip->li_lsn;
while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
@@ -385,7 +385,7 @@ xfsaild_push(
lock_result = lip->li_ops->iop_push(lip, &ailp->xa_buf_list);
switch (lock_result) {
case XFS_ITEM_SUCCESS:
- XFS_STATS_INC(xs_push_ail_success);
+ XFS_STATS_INC(mp, xs_push_ail_success);
trace_xfs_ail_push(lip);
ailp->xa_last_pushed_lsn = lsn;
@@ -403,7 +403,7 @@ xfsaild_push(
* re-try the flushing relatively soon if most of the
* AIL is beeing flushed.
*/
- XFS_STATS_INC(xs_push_ail_flushing);
+ XFS_STATS_INC(mp, xs_push_ail_flushing);
trace_xfs_ail_flushing(lip);
flushing++;
@@ -411,14 +411,14 @@ xfsaild_push(
break;
case XFS_ITEM_PINNED:
- XFS_STATS_INC(xs_push_ail_pinned);
+ XFS_STATS_INC(mp, xs_push_ail_pinned);
trace_xfs_ail_pinned(lip);
stuck++;
ailp->xa_log_flush++;
break;
case XFS_ITEM_LOCKED:
- XFS_STATS_INC(xs_push_ail_locked);
+ XFS_STATS_INC(mp, xs_push_ail_locked);
trace_xfs_ail_locked(lip);
stuck++;
@@ -497,6 +497,7 @@ xfsaild(
long tout = 0; /* milliseconds */
current->flags |= PF_MEMALLOC;
+ set_freezable();
while (!kthread_should_stop()) {
if (tout && tout <= 20)
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 17280cd71934..b97f1df910ab 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -108,6 +108,15 @@ xfs_trans_log_inode(
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
/*
+ * Record the specific change for fdatasync optimisation. This
+ * allows fdatasync to skip log forces for inodes that are only
+ * timestamp dirty. We do this before the change count so that
+ * the core being logged in this case does not impact on fdatasync
+ * behaviour.
+ */
+ ip->i_itemp->ili_fsync_fields |= flags;
+
+ /*
* First time we log the inode in a transaction, bump the inode change
* counter if it is configured for this to occur. We don't use
* inode_inc_version() because there is no need for extra locking around
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index c036815183cb..8294f86441bf 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -53,11 +53,34 @@ xfs_xattr_get(struct dentry *dentry, const char *name,
return asize;
}
+void
+xfs_forget_acl(
+ struct inode *inode,
+ const char *name,
+ int xflags)
+{
+ /*
+ * Invalidate any cached ACLs if the user has bypassed the ACL
+ * interface. We don't validate the content whatsoever so it is caller
+ * responsibility to provide data in valid format and ensure i_mode is
+ * consistent.
+ */
+ if (xflags & ATTR_ROOT) {
+#ifdef CONFIG_XFS_POSIX_ACL
+ if (!strcmp(name, SGI_ACL_FILE))
+ forget_cached_acl(inode, ACL_TYPE_ACCESS);
+ else if (!strcmp(name, SGI_ACL_DEFAULT))
+ forget_cached_acl(inode, ACL_TYPE_DEFAULT);
+#endif
+ }
+}
+
static int
xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags, int xflags)
{
- struct xfs_inode *ip = XFS_I(d_inode(dentry));
+ struct xfs_inode *ip = XFS_I(d_inode(dentry));
+ int error;
if (strcmp(name, "") == 0)
return -EINVAL;
@@ -70,8 +93,12 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
if (!value)
return xfs_attr_remove(ip, (unsigned char *)name, xflags);
- return xfs_attr_set(ip, (unsigned char *)name,
+ error = xfs_attr_set(ip, (unsigned char *)name,
(void *)value, size, xflags);
+ if (!error)
+ xfs_forget_acl(d_inode(dentry), name, xflags);
+
+ return error;
}
static const struct xattr_handler xfs_xattr_user_handler = {