aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/callback.c7
-rw-r--r--fs/afs/cmservice.c11
-rw-r--r--fs/afs/file.c20
-rw-r--r--fs/afs/fsclient.c77
-rw-r--r--fs/afs/inode.c42
-rw-r--r--fs/afs/internal.h23
-rw-r--r--fs/afs/misc.c2
-rw-r--r--fs/afs/mntpt.c53
-rw-r--r--fs/afs/rxrpc.c149
-rw-r--r--fs/afs/security.c9
-rw-r--r--fs/afs/server.c6
-rw-r--r--fs/afs/vlocation.c16
-rw-r--r--fs/afs/write.c76
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent_io.c49
-rw-r--r--fs/btrfs/inode.c20
-rw-r--r--fs/btrfs/qgroup.c10
-rw-r--r--fs/btrfs/send.c7
-rw-r--r--fs/crypto/crypto.c10
-rw-r--r--fs/crypto/fname.c2
-rw-r--r--fs/crypto/fscrypt_private.h4
-rw-r--r--fs/crypto/keyinfo.c52
-rw-r--r--fs/crypto/policy.c7
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/ext4/inline.c5
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/ext4/super.c10
-rw-r--r--fs/ext4/xattr.c65
-rw-r--r--fs/f2fs/debug.c1
-rw-r--r--fs/f2fs/dir.c2
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/node.c163
-rw-r--r--fs/f2fs/segment.c6
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fs-writeback.c35
-rw-r--r--fs/gfs2/incore.h2
-rw-r--r--fs/iomap.c17
-rw-r--r--fs/jbd2/journal.c22
-rw-r--r--fs/jbd2/revoke.c1
-rw-r--r--fs/kernfs/file.c3
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs/client.c25
-rw-r--r--fs/nfs/dir.c9
-rw-r--r--fs/nfs/filelayout/filelayout.c151
-rw-r--r--fs/nfs/filelayout/filelayout.h19
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c8
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h14
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c9
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs4client.c4
-rw-r--r--fs/nfs/nfs4proc.c20
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/pnfs.h2
-rw-r--r--fs/nfs/pnfs_nfs.c31
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/overlayfs/util.c1
-rw-r--r--fs/timerfd.c8
-rw-r--r--fs/userfaultfd.c75
-rw-r--r--fs/xfs/kmem.c18
-rw-r--r--fs/xfs/kmem.h2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c34
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c87
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c26
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h2
-rw-r--r--fs/xfs/xfs_aops.c59
-rw-r--r--fs/xfs/xfs_dir2_readdir.c11
-rw-r--r--fs/xfs/xfs_icache.c2
-rw-r--r--fs/xfs/xfs_inode.c14
-rw-r--r--fs/xfs/xfs_iomap.c25
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--fs/xfs/xfs_reflink.c23
-rw-r--r--fs/xfs/xfs_reflink.h4
-rw-r--r--fs/xfs/xfs_super.c2
79 files changed, 974 insertions, 753 deletions
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index b29447e03ede..25d404d22cae 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work)
{
struct afs_server *server;
struct afs_vnode *vnode, *xvnode;
- time_t now;
+ time64_t now;
long timeout;
int ret;
@@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work)
_enter("");
- now = get_seconds();
+ now = ktime_get_real_seconds();
/* find the first vnode to update */
spin_lock(&server->cb_lock);
@@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work)
/* and then reschedule */
_debug("reschedule");
- vnode->update_at = get_seconds() + afs_vnode_update_timeout;
+ vnode->update_at = ktime_get_real_seconds() +
+ afs_vnode_update_timeout;
spin_lock(&server->cb_lock);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 2edbdcbf6432..3062cceb5c2a 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -187,7 +187,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
struct afs_callback *cb;
struct afs_server *server;
__be32 *bp;
- u32 tmp;
int ret, loop;
_enter("{%u}", call->unmarshall);
@@ -249,9 +248,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
if (ret < 0)
return ret;
- tmp = ntohl(call->tmp);
- _debug("CB count: %u", tmp);
- if (tmp != call->count && tmp != 0)
+ call->count2 = ntohl(call->tmp);
+ _debug("CB count: %u", call->count2);
+ if (call->count2 != call->count && call->count2 != 0)
return -EBADMSG;
call->offset = 0;
call->unmarshall++;
@@ -259,14 +258,14 @@ static int afs_deliver_cb_callback(struct afs_call *call)
case 4:
_debug("extract CB array");
ret = afs_extract_data(call, call->buffer,
- call->count * 3 * 4, false);
+ call->count2 * 3 * 4, false);
if (ret < 0)
return ret;
_debug("unmarshall CB array");
cb = call->request;
bp = call->buffer;
- for (loop = call->count; loop > 0; loop--, cb++) {
+ for (loop = call->count2; loop > 0; loop--, cb++) {
cb->version = ntohl(*bp++);
cb->expiry = ntohl(*bp++);
cb->type = ntohl(*bp++);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index ba7b71fba34b..0d5b8508869b 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -30,6 +30,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping,
const struct file_operations afs_file_operations = {
.open = afs_open,
+ .flush = afs_flush,
.release = afs_release,
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
@@ -184,10 +185,13 @@ int afs_page_filler(void *data, struct page *page)
if (!req)
goto enomem;
+ /* We request a full page. If the page is a partial one at the
+ * end of the file, the server will return a short read and the
+ * unmarshalling code will clear the unfilled space.
+ */
atomic_set(&req->usage, 1);
req->pos = (loff_t)page->index << PAGE_SHIFT;
- req->len = min_t(size_t, i_size_read(inode) - req->pos,
- PAGE_SIZE);
+ req->len = PAGE_SIZE;
req->nr_pages = 1;
req->pages[0] = page;
get_page(page);
@@ -208,7 +212,13 @@ int afs_page_filler(void *data, struct page *page)
fscache_uncache_page(vnode->cache, page);
#endif
BUG_ON(PageFsCache(page));
- goto error;
+
+ if (ret == -EINTR ||
+ ret == -ENOMEM ||
+ ret == -ERESTARTSYS ||
+ ret == -EAGAIN)
+ goto error;
+ goto io_error;
}
SetPageUptodate(page);
@@ -227,10 +237,12 @@ int afs_page_filler(void *data, struct page *page)
_leave(" = 0");
return 0;
+io_error:
+ SetPageError(page);
+ goto error;
enomem:
ret = -ENOMEM;
error:
- SetPageError(page);
unlock_page(page);
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index ac8e766978dc..19f76ae36982 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -17,6 +17,12 @@
#include "afs_fs.h"
/*
+ * We need somewhere to discard into in case the server helpfully returns more
+ * than we asked for in FS.FetchData{,64}.
+ */
+static u8 afs_discard_buffer[64];
+
+/*
* decode an AFSFid block
*/
static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid)
@@ -105,7 +111,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
vnode->vfs_inode.i_mode = mode;
}
- vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server;
+ vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client;
vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime;
vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime;
vnode->vfs_inode.i_version = data_version;
@@ -139,7 +145,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
vnode->cb_version = ntohl(*bp++);
vnode->cb_expiry = ntohl(*bp++);
vnode->cb_type = ntohl(*bp++);
- vnode->cb_expires = vnode->cb_expiry + get_seconds();
+ vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds();
*_bp = bp;
}
@@ -315,7 +321,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
void *buffer;
int ret;
- _enter("{%u,%zu/%u;%u/%llu}",
+ _enter("{%u,%zu/%u;%llu/%llu}",
call->unmarshall, call->offset, call->count,
req->remain, req->actual_len);
@@ -353,12 +359,6 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
req->actual_len |= ntohl(call->tmp);
_debug("DATA length: %llu", req->actual_len);
- /* Check that the server didn't want to send us extra. We
- * might want to just discard instead, but that requires
- * cooperation from AF_RXRPC.
- */
- if (req->actual_len > req->len)
- return -EBADMSG;
req->remain = req->actual_len;
call->offset = req->pos & (PAGE_SIZE - 1);
@@ -368,6 +368,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
call->unmarshall++;
begin_page:
+ ASSERTCMP(req->index, <, req->nr_pages);
if (req->remain > PAGE_SIZE - call->offset)
size = PAGE_SIZE - call->offset;
else
@@ -378,7 +379,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
/* extract the returned data */
case 3:
- _debug("extract data %u/%llu %zu/%u",
+ _debug("extract data %llu/%llu %zu/%u",
req->remain, req->actual_len, call->offset, call->count);
buffer = kmap(req->pages[req->index]);
@@ -389,19 +390,40 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
if (call->offset == PAGE_SIZE) {
if (req->page_done)
req->page_done(call, req);
+ req->index++;
if (req->remain > 0) {
- req->index++;
call->offset = 0;
+ if (req->index >= req->nr_pages) {
+ call->unmarshall = 4;
+ goto begin_discard;
+ }
goto begin_page;
}
}
+ goto no_more_data;
+
+ /* Discard any excess data the server gave us */
+ begin_discard:
+ case 4:
+ size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
+ call->count = size;
+ _debug("extract discard %llu/%llu %zu/%u",
+ req->remain, req->actual_len, call->offset, call->count);
+
+ call->offset = 0;
+ ret = afs_extract_data(call, afs_discard_buffer, call->count, true);
+ req->remain -= call->offset;
+ if (ret < 0)
+ return ret;
+ if (req->remain > 0)
+ goto begin_discard;
no_more_data:
call->offset = 0;
- call->unmarshall++;
+ call->unmarshall = 5;
/* extract the metadata */
- case 4:
+ case 5:
ret = afs_extract_data(call, call->buffer,
(21 + 3 + 6) * 4, false);
if (ret < 0)
@@ -416,16 +438,17 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
call->offset = 0;
call->unmarshall++;
- case 5:
+ case 6:
break;
}
- if (call->count < PAGE_SIZE) {
- buffer = kmap(req->pages[req->index]);
- memset(buffer + call->count, 0, PAGE_SIZE - call->count);
- kunmap(req->pages[req->index]);
+ for (; req->index < req->nr_pages; req->index++) {
+ if (call->count < PAGE_SIZE)
+ zero_user_segment(req->pages[req->index],
+ call->count, PAGE_SIZE);
if (req->page_done)
req->page_done(call, req);
+ call->count = 0;
}
_leave(" = 0 [done]");
@@ -711,8 +734,8 @@ int afs_fs_create(struct afs_server *server,
memset(bp, 0, padsz);
bp = (void *) bp + padsz;
}
- *bp++ = htonl(AFS_SET_MODE);
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
@@ -980,8 +1003,8 @@ int afs_fs_symlink(struct afs_server *server,
memset(bp, 0, c_padsz);
bp = (void *) bp + c_padsz;
}
- *bp++ = htonl(AFS_SET_MODE);
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = htonl(S_IRWXUGO); /* unix mode */
@@ -1180,8 +1203,8 @@ static int afs_fs_store_data64(struct afs_server *server,
*bp++ = htonl(vnode->fid.vnode);
*bp++ = htonl(vnode->fid.unique);
- *bp++ = 0; /* mask */
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MTIME); /* mask */
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = 0; /* unix mode */
@@ -1213,7 +1236,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
_enter(",%x,{%x:%u},,",
key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
- size = to - offset;
+ size = (loff_t)to - (loff_t)offset;
if (first != last)
size += (loff_t)(last - first) << PAGE_SHIFT;
pos = (loff_t)first << PAGE_SHIFT;
@@ -1257,8 +1280,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
*bp++ = htonl(vnode->fid.vnode);
*bp++ = htonl(vnode->fid.unique);
- *bp++ = 0; /* mask */
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MTIME); /* mask */
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = 0; /* unix mode */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1e4897a048d2..aae55dd15108 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -54,8 +54,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
inode->i_fop = &afs_dir_file_operations;
break;
case AFS_FTYPE_SYMLINK:
- inode->i_mode = S_IFLNK | vnode->status.mode;
- inode->i_op = &page_symlink_inode_operations;
+ /* Symlinks with a mode of 0644 are actually mountpoints. */
+ if ((vnode->status.mode & 0777) == 0644) {
+ inode->i_flags |= S_AUTOMOUNT;
+
+ spin_lock(&vnode->lock);
+ set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+ spin_unlock(&vnode->lock);
+
+ inode->i_mode = S_IFDIR | 0555;
+ inode->i_op = &afs_mntpt_inode_operations;
+ inode->i_fop = &afs_mntpt_file_operations;
+ } else {
+ inode->i_mode = S_IFLNK | vnode->status.mode;
+ inode->i_op = &page_symlink_inode_operations;
+ }
inode_nohighmem(inode);
break;
default:
@@ -70,27 +83,15 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
set_nlink(inode, vnode->status.nlink);
inode->i_uid = vnode->status.owner;
- inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_gid = vnode->status.group;
inode->i_size = vnode->status.size;
- inode->i_ctime.tv_sec = vnode->status.mtime_server;
+ inode->i_ctime.tv_sec = vnode->status.mtime_client;
inode->i_ctime.tv_nsec = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime;
inode->i_blocks = 0;
inode->i_generation = vnode->fid.unique;
inode->i_version = vnode->status.data_version;
inode->i_mapping->a_ops = &afs_fs_aops;
-
- /* check to see whether a symbolic link is really a mountpoint */
- if (vnode->status.type == AFS_FTYPE_SYMLINK) {
- afs_mntpt_check_symlink(vnode, key);
-
- if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) {
- inode->i_mode = S_IFDIR | vnode->status.mode;
- inode->i_op = &afs_mntpt_inode_operations;
- inode->i_fop = &afs_mntpt_file_operations;
- }
- }
-
return 0;
}
@@ -245,12 +246,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
vnode->cb_version = 0;
vnode->cb_expiry = 0;
vnode->cb_type = 0;
- vnode->cb_expires = get_seconds();
+ vnode->cb_expires = ktime_get_real_seconds();
} else {
vnode->cb_version = cb->version;
vnode->cb_expiry = cb->expiry;
vnode->cb_type = cb->type;
- vnode->cb_expires = vnode->cb_expiry + get_seconds();
+ vnode->cb_expires = vnode->cb_expiry +
+ ktime_get_real_seconds();
}
}
@@ -323,7 +325,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
!test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- if (vnode->cb_expires < get_seconds() + 10) {
+ if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
_debug("callback expired");
set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
} else {
@@ -444,7 +446,7 @@ void afs_evict_inode(struct inode *inode)
mutex_lock(&vnode->permits_lock);
permits = vnode->permits;
- rcu_assign_pointer(vnode->permits, NULL);
+ RCU_INIT_POINTER(vnode->permits, NULL);
mutex_unlock(&vnode->permits_lock);
if (permits)
call_rcu(&permits->rcu, afs_zap_permits);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5dfa56903a2d..a6901360fb81 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -11,6 +11,7 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/ktime.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/rxrpc.h>
@@ -90,7 +91,10 @@ struct afs_call {
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned first_offset; /* offset into mapping[first] */
- unsigned last_to; /* amount of mapping[last] */
+ union {
+ unsigned last_to; /* amount of mapping[last] */
+ unsigned count2; /* count used in unmarshalling */
+ };
unsigned char unmarshall; /* unmarshalling phase */
bool incoming; /* T if incoming call */
bool send_pages; /* T if data from mapping should be sent */
@@ -127,12 +131,11 @@ struct afs_call_type {
*/
struct afs_read {
loff_t pos; /* Where to start reading */
- loff_t len; /* How much to read */
+ loff_t len; /* How much we're asking for */
loff_t actual_len; /* How much we're actually getting */
+ loff_t remain; /* Amount remaining */
atomic_t usage;
- unsigned int remain; /* Amount remaining */
unsigned int index; /* Which page we're reading into */
- unsigned int pg_offset; /* Offset in page we're at */
unsigned int nr_pages;
void (*page_done)(struct afs_call *, struct afs_read *);
struct page *pages[];
@@ -247,7 +250,7 @@ struct afs_cache_vhash {
*/
struct afs_vlocation {
atomic_t usage;
- time_t time_of_death; /* time at which put reduced usage to 0 */
+ time64_t time_of_death; /* time at which put reduced usage to 0 */
struct list_head link; /* link in cell volume location list */
struct list_head grave; /* link in master graveyard list */
struct list_head update; /* link in master update list */
@@ -258,7 +261,7 @@ struct afs_vlocation {
struct afs_cache_vlocation vldb; /* volume information DB record */
struct afs_volume *vols[3]; /* volume access record pointer (index by type) */
wait_queue_head_t waitq; /* status change waitqueue */
- time_t update_at; /* time at which record should be updated */
+ time64_t update_at; /* time at which record should be updated */
spinlock_t lock; /* access lock */
afs_vlocation_state_t state; /* volume location state */
unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */
@@ -271,7 +274,7 @@ struct afs_vlocation {
*/
struct afs_server {
atomic_t usage;
- time_t time_of_death; /* time at which put reduced usage to 0 */
+ time64_t time_of_death; /* time at which put reduced usage to 0 */
struct in_addr addr; /* server address */
struct afs_cell *cell; /* cell in which server resides */
struct list_head link; /* link in cell's server list */
@@ -374,8 +377,8 @@ struct afs_vnode {
struct rb_node server_rb; /* link in server->fs_vnodes */
struct rb_node cb_promise; /* link in server->cb_promises */
struct work_struct cb_broken_work; /* work to be done on callback break */
- time_t cb_expires; /* time at which callback expires */
- time_t cb_expires_at; /* time used to order cb_promise */
+ time64_t cb_expires; /* time at which callback expires */
+ time64_t cb_expires_at; /* time used to order cb_promise */
unsigned cb_version; /* callback version */
unsigned cb_expiry; /* callback expiry time */
afs_callback_type_t cb_type; /* type of callback */
@@ -557,7 +560,6 @@ extern const struct inode_operations afs_autocell_inode_operations;
extern const struct file_operations afs_mntpt_file_operations;
extern struct vfsmount *afs_d_automount(struct path *);
-extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
extern void afs_mntpt_kill_timer(void);
/*
@@ -718,6 +720,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *);
extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
extern int afs_writeback_all(struct afs_vnode *);
+extern int afs_flush(struct file *, fl_owner_t);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 91ea1aa0d8b3..100b207efc9e 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -84,6 +84,8 @@ int afs_abort_to_error(u32 abort_code)
case RXKADDATALEN: return -EKEYREJECTED;
case RXKADILLEGALLEVEL: return -EKEYREJECTED;
+ case RXGEN_OPCODE: return -ENOTSUPP;
+
default: return -EREMOTEIO;
}
}
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index d4fb0afc0097..bd3b65cde282 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -47,59 +47,6 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
/*
- * check a symbolic link to see whether it actually encodes a mountpoint
- * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
- */
-int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
-{
- struct page *page;
- size_t size;
- char *buf;
- int ret;
-
- _enter("{%x:%u,%u}",
- vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-
- /* read the contents of the symlink into the pagecache */
- page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0,
- afs_page_filler, key);
- if (IS_ERR(page)) {
- ret = PTR_ERR(page);
- goto out;
- }
-
- ret = -EIO;
- if (PageError(page))
- goto out_free;
-
- buf = kmap(page);
-
- /* examine the symlink's contents */
- size = vnode->status.size;
- _debug("symlink to %*.*s", (int) size, (int) size, buf);
-
- if (size > 2 &&
- (buf[0] == '%' || buf[0] == '#') &&
- buf[size - 1] == '.'
- ) {
- _debug("symlink is a mountpoint");
- spin_lock(&vnode->lock);
- set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
- vnode->vfs_inode.i_flags |= S_AUTOMOUNT;
- spin_unlock(&vnode->lock);
- }
-
- ret = 0;
-
- kunmap(page);
-out_free:
- put_page(page);
-out:
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
* no valid lookup procedure on this sort of dir
*/
static struct dentry *afs_mntpt_lookup(struct inode *dir,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 419ef05dcb5e..8f76b13d5549 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -259,67 +259,74 @@ void afs_flat_call_destructor(struct afs_call *call)
call->buffer = NULL;
}
+#define AFS_BVEC_MAX 8
+
+/*
+ * Load the given bvec with the next few pages.
+ */
+static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
+ struct bio_vec *bv, pgoff_t first, pgoff_t last,
+ unsigned offset)
+{
+ struct page *pages[AFS_BVEC_MAX];
+ unsigned int nr, n, i, to, bytes = 0;
+
+ nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX);
+ n = find_get_pages_contig(call->mapping, first, nr, pages);
+ ASSERTCMP(n, ==, nr);
+
+ msg->msg_flags |= MSG_MORE;
+ for (i = 0; i < nr; i++) {
+ to = PAGE_SIZE;
+ if (first + i >= last) {
+ to = call->last_to;
+ msg->msg_flags &= ~MSG_MORE;
+ }
+ bv[i].bv_page = pages[i];
+ bv[i].bv_len = to - offset;
+ bv[i].bv_offset = offset;
+ bytes += to - offset;
+ offset = 0;
+ }
+
+ iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
+}
+
/*
* attach the data from a bunch of pages on an inode to a call
*/
static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
{
- struct page *pages[8];
- unsigned count, n, loop, offset, to;
+ struct bio_vec bv[AFS_BVEC_MAX];
+ unsigned int bytes, nr, loop, offset;
pgoff_t first = call->first, last = call->last;
int ret;
- _enter("");
-
offset = call->first_offset;
call->first_offset = 0;
do {
- _debug("attach %lx-%lx", first, last);
-
- count = last - first + 1;
- if (count > ARRAY_SIZE(pages))
- count = ARRAY_SIZE(pages);
- n = find_get_pages_contig(call->mapping, first, count, pages);
- ASSERTCMP(n, ==, count);
-
- loop = 0;
- do {
- struct bio_vec bvec = {.bv_page = pages[loop],
- .bv_offset = offset};
- msg->msg_flags = 0;
- to = PAGE_SIZE;
- if (first + loop >= last)
- to = call->last_to;
- else
- msg->msg_flags = MSG_MORE;
- bvec.bv_len = to - offset;
- offset = 0;
-
- _debug("- range %u-%u%s",
- offset, to, msg->msg_flags ? " [more]" : "");
- iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC,
- &bvec, 1, to - offset);
-
- /* have to change the state *before* sending the last
- * packet as RxRPC might give us the reply before it
- * returns from sending the request */
- if (first + loop >= last)
- call->state = AFS_CALL_AWAIT_REPLY;
- ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
- msg, to - offset);
- if (ret < 0)
- break;
- } while (++loop < count);
- first += count;
-
- for (loop = 0; loop < count; loop++)
- put_page(pages[loop]);
+ afs_load_bvec(call, msg, bv, first, last, offset);
+ offset = 0;
+ bytes = msg->msg_iter.count;
+ nr = msg->msg_iter.nr_segs;
+
+ /* Have to change the state *before* sending the last
+ * packet as RxRPC might give us the reply before it
+ * returns from sending the request.
+ */
+ if (first + nr - 1 >= last)
+ call->state = AFS_CALL_AWAIT_REPLY;
+ ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
+ msg, bytes);
+ for (loop = 0; loop < nr; loop++)
+ put_page(bv[loop].bv_page);
if (ret < 0)
break;
+
+ first += nr;
} while (first <= last);
- _leave(" = %d", ret);
return ret;
}
@@ -333,6 +340,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
struct rxrpc_call *rxcall;
struct msghdr msg;
struct kvec iov[1];
+ size_t offset;
+ u32 abort_code;
int ret;
_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
@@ -381,9 +390,11 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
msg.msg_controllen = 0;
msg.msg_flags = (call->send_pages ? MSG_MORE : 0);
- /* have to change the state *before* sending the last packet as RxRPC
- * might give us the reply before it returns from sending the
- * request */
+ /* We have to change the state *before* sending the last packet as
+ * rxrpc might give us the reply before it returns from sending the
+ * request. Further, if the send fails, we may already have been given
+ * a notification and may have collected it.
+ */
if (!call->send_pages)
call->state = AFS_CALL_AWAIT_REPLY;
ret = rxrpc_kernel_send_data(afs_socket, rxcall,
@@ -405,7 +416,17 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
return afs_wait_for_call_to_complete(call);
error_do_abort:
- rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
+ call->state = AFS_CALL_COMPLETE;
+ if (ret != -ECONNABORTED) {
+ rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT,
+ -ret, "KSD");
+ } else {
+ abort_code = 0;
+ offset = 0;
+ rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset,
+ false, &abort_code);
+ ret = call->type->abort_to_error(abort_code);
+ }
error_kill_call:
afs_put_call(call);
_leave(" = %d", ret);
@@ -452,16 +473,18 @@ static void afs_deliver_to_call(struct afs_call *call)
case -EINPROGRESS:
case -EAGAIN:
goto out;
+ case -ECONNABORTED:
+ goto call_complete;
case -ENOTCONN:
abort_code = RX_CALL_DEAD;
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
abort_code, -ret, "KNC");
- goto do_abort;
+ goto save_error;
case -ENOTSUPP:
- abort_code = RX_INVALID_OPERATION;
+ abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
abort_code, -ret, "KIV");
- goto do_abort;
+ goto save_error;
case -ENODATA:
case -EBADMSG:
case -EMSGSIZE:
@@ -471,7 +494,7 @@ static void afs_deliver_to_call(struct afs_call *call)
abort_code = RXGEN_SS_UNMARSHAL;
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
abort_code, EBADMSG, "KUM");
- goto do_abort;
+ goto save_error;
}
}
@@ -482,8 +505,9 @@ out:
_leave("");
return;
-do_abort:
+save_error:
call->error = ret;
+call_complete:
call->state = AFS_CALL_COMPLETE;
goto done;
}
@@ -493,7 +517,6 @@ do_abort:
*/
static int afs_wait_for_call_to_complete(struct afs_call *call)
{
- const char *abort_why;
int ret;
DECLARE_WAITQUEUE(myself, current);
@@ -512,13 +535,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
continue;
}
- abort_why = "KWC";
- ret = call->error;
- if (call->state == AFS_CALL_COMPLETE)
- break;
- abort_why = "KWI";
- ret = -EINTR;
- if (signal_pending(current))
+ if (call->state == AFS_CALL_COMPLETE ||
+ signal_pending(current))
break;
schedule();
}
@@ -526,13 +544,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
remove_wait_queue(&call->waitq, &myself);
__set_current_state(TASK_RUNNING);
- /* kill the call */
+ /* Kill off the call if it's still live. */
if (call->state < AFS_CALL_COMPLETE) {
- _debug("call incomplete");
+ _debug("call interrupted");
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
- RX_CALL_DEAD, -ret, abort_why);
+ RX_USER_ABORT, -EINTR, "KWI");
}
+ ret = call->error;
_debug("call complete");
afs_put_call(call);
_leave(" = %d", ret);
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 8d010422dc89..ecb86a670180 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -114,7 +114,7 @@ void afs_clear_permits(struct afs_vnode *vnode)
mutex_lock(&vnode->permits_lock);
permits = vnode->permits;
- rcu_assign_pointer(vnode->permits, NULL);
+ RCU_INIT_POINTER(vnode->permits, NULL);
mutex_unlock(&vnode->permits_lock);
if (permits)
@@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask)
} else {
if (!(access & AFS_ACE_LOOKUP))
goto permission_denied;
+ if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR))
+ goto permission_denied;
if (mask & (MAY_EXEC | MAY_READ)) {
if (!(access & AFS_ACE_READ))
goto permission_denied;
+ if (!(inode->i_mode & S_IRUSR))
+ goto permission_denied;
} else if (mask & MAY_WRITE) {
if (!(access & AFS_ACE_WRITE))
goto permission_denied;
+ if (!(inode->i_mode & S_IWUSR))
+ goto permission_denied;
}
}
key_put(key);
- ret = generic_permission(inode, mask);
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index d4066ab7dd55..c001b1f2455f 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -242,7 +242,7 @@ void afs_put_server(struct afs_server *server)
spin_lock(&afs_server_graveyard_lock);
if (atomic_read(&server->usage) == 0) {
list_move_tail(&server->grave, &afs_server_graveyard);
- server->time_of_death = get_seconds();
+ server->time_of_death = ktime_get_real_seconds();
queue_delayed_work(afs_wq, &afs_server_reaper,
afs_server_timeout * HZ);
}
@@ -277,9 +277,9 @@ static void afs_reap_server(struct work_struct *work)
LIST_HEAD(corpses);
struct afs_server *server;
unsigned long delay, expiry;
- time_t now;
+ time64_t now;
- now = get_seconds();
+ now = ktime_get_real_seconds();
spin_lock(&afs_server_graveyard_lock);
while (!list_empty(&afs_server_graveyard)) {
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index d7d8dd8c0b31..37b7c3b342a6 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
struct afs_vlocation *xvl;
/* wait at least 10 minutes before updating... */
- vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+ vl->update_at = ktime_get_real_seconds() +
+ afs_vlocation_update_timeout;
spin_lock(&afs_vlocation_updates_lock);
@@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl)
if (atomic_read(&vl->usage) == 0) {
_debug("buried");
list_move_tail(&vl->grave, &afs_vlocation_graveyard);
- vl->time_of_death = get_seconds();
+ vl->time_of_death = ktime_get_real_seconds();
queue_delayed_work(afs_wq, &afs_vlocation_reap,
afs_vlocation_timeout * HZ);
@@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work)
LIST_HEAD(corpses);
struct afs_vlocation *vl;
unsigned long delay, expiry;
- time_t now;
+ time64_t now;
_enter("");
- now = get_seconds();
+ now = ktime_get_real_seconds();
spin_lock(&afs_vlocation_graveyard_lock);
while (!list_empty(&afs_vlocation_graveyard)) {
@@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work)
{
struct afs_cache_vlocation vldb;
struct afs_vlocation *vl, *xvl;
- time_t now;
+ time64_t now;
long timeout;
int ret;
_enter("");
- now = get_seconds();
+ now = ktime_get_real_seconds();
/* find a record to update */
spin_lock(&afs_vlocation_updates_lock);
@@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work)
/* and then reschedule */
_debug("reschedule");
- vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+ vl->update_at = ktime_get_real_seconds() +
+ afs_vlocation_update_timeout;
spin_lock(&afs_vlocation_updates_lock);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c83c1a0e851f..2d2fccd5044b 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -84,10 +84,9 @@ void afs_put_writeback(struct afs_writeback *wb)
* partly or wholly fill a page that's under preparation for writing
*/
static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
- loff_t pos, struct page *page)
+ loff_t pos, unsigned int len, struct page *page)
{
struct afs_read *req;
- loff_t i_size;
int ret;
_enter(",,%llu", (unsigned long long)pos);
@@ -99,14 +98,10 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
atomic_set(&req->usage, 1);
req->pos = pos;
+ req->len = len;
req->nr_pages = 1;
req->pages[0] = page;
-
- i_size = i_size_read(&vnode->vfs_inode);
- if (pos + PAGE_SIZE > i_size)
- req->len = i_size - pos;
- else
- req->len = PAGE_SIZE;
+ get_page(page);
ret = afs_vnode_fetch_data(vnode, key, req);
afs_put_read(req);
@@ -159,12 +154,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
kfree(candidate);
return -ENOMEM;
}
- *pagep = page;
- /* page won't leak in error case: it eventually gets cleaned off LRU */
if (!PageUptodate(page) && len != PAGE_SIZE) {
- ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page);
+ ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
kfree(candidate);
_leave(" = %d [prep]", ret);
return ret;
@@ -172,6 +167,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
SetPageUptodate(page);
}
+ /* page won't leak in error case: it eventually gets cleaned off LRU */
+ *pagep = page;
+
try_again:
spin_lock(&vnode->writeback_lock);
@@ -233,7 +231,7 @@ flush_conflicting_wb:
if (wb->state == AFS_WBACK_PENDING)
wb->state = AFS_WBACK_CONFLICTING;
spin_unlock(&vnode->writeback_lock);
- if (PageDirty(page)) {
+ if (clear_page_dirty_for_io(page)) {
ret = afs_write_back_from_locked_page(wb, page);
if (ret < 0) {
afs_put_writeback(candidate);
@@ -257,7 +255,9 @@ int afs_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+ struct key *key = file->private_data;
loff_t i_size, maybe_i_size;
+ int ret;
_enter("{%x:%u},{%lx}",
vnode->fid.vid, vnode->fid.vnode, page->index);
@@ -273,6 +273,20 @@ int afs_write_end(struct file *file, struct address_space *mapping,
spin_unlock(&vnode->writeback_lock);
}
+ if (!PageUptodate(page)) {
+ if (copied < len) {
+ /* Try and load any missing data from the server. The
+ * unmarshalling routine will take care of clearing any
+ * bits that are beyond the EOF.
+ */
+ ret = afs_fill_page(vnode, key, pos + copied,
+ len - copied, page);
+ if (ret < 0)
+ return ret;
+ }
+ SetPageUptodate(page);
+ }
+
set_page_dirty(page);
if (PageDirty(page))
_debug("dirtied");
@@ -307,10 +321,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
ASSERTCMP(pv.nr, ==, count);
for (loop = 0; loop < count; loop++) {
- ClearPageUptodate(pv.pages[loop]);
+ struct page *page = pv.pages[loop];
+ ClearPageUptodate(page);
if (error)
- SetPageError(pv.pages[loop]);
- end_page_writeback(pv.pages[loop]);
+ SetPageError(page);
+ if (PageWriteback(page))
+ end_page_writeback(page);
+ if (page->index >= first)
+ first = page->index + 1;
}
__pagevec_release(&pv);
@@ -335,8 +353,6 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
_enter(",%lx", primary_page->index);
count = 1;
- if (!clear_page_dirty_for_io(primary_page))
- BUG();
if (test_set_page_writeback(primary_page))
BUG();
@@ -502,17 +518,17 @@ static int afs_writepages_region(struct address_space *mapping,
*/
lock_page(page);
- if (page->mapping != mapping) {
+ if (page->mapping != mapping || !PageDirty(page)) {
unlock_page(page);
put_page(page);
continue;
}
- if (wbc->sync_mode != WB_SYNC_NONE)
- wait_on_page_writeback(page);
-
- if (PageWriteback(page) || !PageDirty(page)) {
+ if (PageWriteback(page)) {
unlock_page(page);
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ wait_on_page_writeback(page);
+ put_page(page);
continue;
}
@@ -523,6 +539,8 @@ static int afs_writepages_region(struct address_space *mapping,
wb->state = AFS_WBACK_WRITING;
spin_unlock(&wb->vnode->writeback_lock);
+ if (!clear_page_dirty_for_io(page))
+ BUG();
ret = afs_write_back_from_locked_page(wb, page);
unlock_page(page);
put_page(page);
@@ -746,6 +764,20 @@ out:
}
/*
+ * Flush out all outstanding writes on a file opened for writing when it is
+ * closed.
+ */
+int afs_flush(struct file *file, fl_owner_t id)
+{
+ _enter("");
+
+ if ((file->f_mode & FMODE_WRITE) == 0)
+ return 0;
+
+ return vfs_fsync(file, 0);
+}
+
+/*
* notification that a previously read-only page is about to become writable
* - if it returns an error, the caller will deliver a bus error signal
*/
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 29b7fc28c607..c4115901d906 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1259,7 +1259,7 @@ struct btrfs_root {
atomic_t will_be_snapshoted;
/* For qgroup metadata space reserve */
- atomic_t qgroup_meta_rsv;
+ atomic64_t qgroup_meta_rsv;
};
static inline u32 btrfs_inode_sectorsize(const struct inode *inode)
{
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 08b74daf35d0..eb1ee7b6f532 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1342,7 +1342,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
atomic_set(&root->orphan_inodes, 0);
atomic_set(&root->refs, 1);
atomic_set(&root->will_be_snapshoted, 0);
- atomic_set(&root->qgroup_meta_rsv, 0);
+ atomic64_set(&root->qgroup_meta_rsv, 0);
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 28e81922a21c..27fdb250b446 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1714,7 +1714,8 @@ static int __process_pages_contig(struct address_space *mapping,
* can we find nothing at @index.
*/
ASSERT(page_ops & PAGE_LOCK);
- return ret;
+ err = -EAGAIN;
+ goto out;
}
for (i = 0; i < ret; i++) {
@@ -2583,26 +2584,36 @@ static void end_bio_extent_readpage(struct bio *bio)
if (tree->ops) {
ret = tree->ops->readpage_io_failed_hook(page, mirror);
- if (!ret && !bio->bi_error)
- uptodate = 1;
- } else {
+ if (ret == -EAGAIN) {
+ /*
+ * Data inode's readpage_io_failed_hook() always
+ * returns -EAGAIN.
+ *
+ * The generic bio_readpage_error handles errors
+ * the following way: If possible, new read
+ * requests are created and submitted and will
+ * end up in end_bio_extent_readpage as well (if
+ * we're lucky, not in the !uptodate case). In
+ * that case it returns 0 and we just go on with
+ * the next page in our bio. If it can't handle
+ * the error it will return -EIO and we remain
+ * responsible for that page.
+ */
+ ret = bio_readpage_error(bio, offset, page,
+ start, end, mirror);
+ if (ret == 0) {
+ uptodate = !bio->bi_error;
+ offset += len;
+ continue;
+ }
+ }
+
/*
- * The generic bio_readpage_error handles errors the
- * following way: If possible, new read requests are
- * created and submitted and will end up in
- * end_bio_extent_readpage as well (if we're lucky, not
- * in the !uptodate case). In that case it returns 0 and
- * we just go on with the next page in our bio. If it
- * can't handle the error it will return -EIO and we
- * remain responsible for that page.
+ * metadata's readpage_io_failed_hook() always returns
+ * -EIO and fixes nothing. -EIO is also returned if
+ * data inode error could not be fixed.
*/
- ret = bio_readpage_error(bio, offset, page, start, end,
- mirror);
- if (ret == 0) {
- uptodate = !bio->bi_error;
- offset += len;
- continue;
- }
+ ASSERT(ret == -EIO);
}
readpage_ok:
if (likely(uptodate)) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c40060cc481f..a18510be76c1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6709,6 +6709,20 @@ static noinline int uncompress_inline(struct btrfs_path *path,
max_size = min_t(unsigned long, PAGE_SIZE, max_size);
ret = btrfs_decompress(compress_type, tmp, page,
extent_offset, inline_size, max_size);
+
+ /*
+ * decompression code contains a memset to fill in any space between the end
+ * of the uncompressed data and the end of max_size in case the decompressed
+ * data ends up shorter than ram_bytes. That doesn't cover the hole between
+ * the end of an inline extent and the beginning of the next block, so we
+ * cover that region here.
+ */
+
+ if (max_size + pg_offset < PAGE_SIZE) {
+ char *map = kmap(page);
+ memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
+ kunmap(page);
+ }
kfree(tmp);
return ret;
}
@@ -10509,9 +10523,9 @@ out_inode:
}
__attribute__((const))
-static int dummy_readpage_io_failed_hook(struct page *page, int failed_mirror)
+static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror)
{
- return 0;
+ return -EAGAIN;
}
static const struct inode_operations btrfs_dir_inode_operations = {
@@ -10556,7 +10570,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
.submit_bio_hook = btrfs_submit_bio_hook,
.readpage_end_io_hook = btrfs_readpage_end_io_hook,
.merge_bio_hook = btrfs_merge_bio_hook,
- .readpage_io_failed_hook = dummy_readpage_io_failed_hook,
+ .readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
/* optional callbacks */
.fill_delalloc = run_delalloc_range,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a5da750c1087..a59801dc2a34 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2948,20 +2948,20 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
ret = qgroup_reserve(root, num_bytes, enforce);
if (ret < 0)
return ret;
- atomic_add(num_bytes, &root->qgroup_meta_rsv);
+ atomic64_add(num_bytes, &root->qgroup_meta_rsv);
return ret;
}
void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
- int reserved;
+ u64 reserved;
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
!is_fstree(root->objectid))
return;
- reserved = atomic_xchg(&root->qgroup_meta_rsv, 0);
+ reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
if (reserved == 0)
return;
btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
@@ -2976,8 +2976,8 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
return;
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
- WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes);
- atomic_sub(num_bytes, &root->qgroup_meta_rsv);
+ WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
+ atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 456c8901489b..a60d5bfb8a49 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6305,8 +6305,13 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
goto out;
}
+ /*
+ * Check that we don't overflow at later allocations, we request
+ * clone_sources_count + 1 items, and compare to unsigned long inside
+ * access_ok.
+ */
if (arg->clone_sources_count >
- ULLONG_MAX / sizeof(*arg->clone_sources)) {
+ ULONG_MAX / sizeof(struct clone_root) - 1) {
ret = -EINVAL;
goto out;
}
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 02a7a9286449..6d6eca394d4d 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -327,7 +327,6 @@ EXPORT_SYMBOL(fscrypt_decrypt_page);
static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
{
struct dentry *dir;
- struct fscrypt_info *ci;
int dir_has_key, cached_with_key;
if (flags & LOOKUP_RCU)
@@ -339,18 +338,11 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
return 0;
}
- ci = d_inode(dir)->i_crypt_info;
- if (ci && ci->ci_keyring_key &&
- (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
- (1 << KEY_FLAG_REVOKED) |
- (1 << KEY_FLAG_DEAD))))
- ci = NULL;
-
/* this should eventually be an flag in d_flags */
spin_lock(&dentry->d_lock);
cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY;
spin_unlock(&dentry->d_lock);
- dir_has_key = (ci != NULL);
+ dir_has_key = (d_inode(dir)->i_crypt_info != NULL);
dput(dir);
/*
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 13052b85c393..37b49894c762 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -350,7 +350,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
fname->disk_name.len = iname->len;
return 0;
}
- ret = fscrypt_get_crypt_info(dir);
+ ret = fscrypt_get_encryption_info(dir);
if (ret && ret != -EOPNOTSUPP)
return ret;
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index fdbb8af32eaf..e39696e64494 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -67,7 +67,6 @@ struct fscrypt_info {
u8 ci_filename_mode;
u8 ci_flags;
struct crypto_skcipher *ci_ctfm;
- struct key *ci_keyring_key;
u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
};
@@ -101,7 +100,4 @@ extern int fscrypt_do_page_crypto(const struct inode *inode,
extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
gfp_t gfp_flags);
-/* keyinfo.c */
-extern int fscrypt_get_crypt_info(struct inode *);
-
#endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index d5d896fa5a71..8cdfddce2b34 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -95,6 +95,7 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
kfree(description);
if (IS_ERR(keyring_key))
return PTR_ERR(keyring_key);
+ down_read(&keyring_key->sem);
if (keyring_key->type != &key_type_logon) {
printk_once(KERN_WARNING
@@ -102,11 +103,9 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
res = -ENOKEY;
goto out;
}
- down_read(&keyring_key->sem);
ukp = user_key_payload_locked(keyring_key);
if (ukp->datalen != sizeof(struct fscrypt_key)) {
res = -EINVAL;
- up_read(&keyring_key->sem);
goto out;
}
master_key = (struct fscrypt_key *)ukp->data;
@@ -117,17 +116,11 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
"%s: key size incorrect: %d\n",
__func__, master_key->size);
res = -ENOKEY;
- up_read(&keyring_key->sem);
goto out;
}
res = derive_key_aes(ctx->nonce, master_key->raw, raw_key);
- up_read(&keyring_key->sem);
- if (res)
- goto out;
-
- crypt_info->ci_keyring_key = keyring_key;
- return 0;
out:
+ up_read(&keyring_key->sem);
key_put(keyring_key);
return res;
}
@@ -169,12 +162,11 @@ static void put_crypt_info(struct fscrypt_info *ci)
if (!ci)
return;
- key_put(ci->ci_keyring_key);
crypto_free_skcipher(ci->ci_ctfm);
kmem_cache_free(fscrypt_info_cachep, ci);
}
-int fscrypt_get_crypt_info(struct inode *inode)
+int fscrypt_get_encryption_info(struct inode *inode)
{
struct fscrypt_info *crypt_info;
struct fscrypt_context ctx;
@@ -184,21 +176,15 @@ int fscrypt_get_crypt_info(struct inode *inode)
u8 *raw_key = NULL;
int res;
+ if (inode->i_crypt_info)
+ return 0;
+
res = fscrypt_initialize(inode->i_sb->s_cop->flags);
if (res)
return res;
if (!inode->i_sb->s_cop->get_context)
return -EOPNOTSUPP;
-retry:
- crypt_info = ACCESS_ONCE(inode->i_crypt_info);
- if (crypt_info) {
- if (!crypt_info->ci_keyring_key ||
- key_validate(crypt_info->ci_keyring_key) == 0)
- return 0;
- fscrypt_put_encryption_info(inode, crypt_info);
- goto retry;
- }
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0) {
@@ -229,7 +215,6 @@ retry:
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
crypt_info->ci_ctfm = NULL;
- crypt_info->ci_keyring_key = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
@@ -273,14 +258,8 @@ retry:
if (res)
goto out;
- kzfree(raw_key);
- raw_key = NULL;
- if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) {
- put_crypt_info(crypt_info);
- goto retry;
- }
- return 0;
-
+ if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL)
+ crypt_info = NULL;
out:
if (res == -ENOKEY)
res = 0;
@@ -288,6 +267,7 @@ out:
kzfree(raw_key);
return res;
}
+EXPORT_SYMBOL(fscrypt_get_encryption_info);
void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
{
@@ -305,17 +285,3 @@ void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
put_crypt_info(ci);
}
EXPORT_SYMBOL(fscrypt_put_encryption_info);
-
-int fscrypt_get_encryption_info(struct inode *inode)
-{
- struct fscrypt_info *ci = inode->i_crypt_info;
-
- if (!ci ||
- (ci->ci_keyring_key &&
- (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
- (1 << KEY_FLAG_REVOKED) |
- (1 << KEY_FLAG_DEAD)))))
- return fscrypt_get_crypt_info(inode);
- return 0;
-}
-EXPORT_SYMBOL(fscrypt_get_encryption_info);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 14b76da71269..4908906d54d5 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -33,17 +33,10 @@ static int create_encryption_context_from_policy(struct inode *inode,
const struct fscrypt_policy *policy)
{
struct fscrypt_context ctx;
- int res;
if (!inode->i_sb->s_cop->set_context)
return -EOPNOTSUPP;
- if (inode->i_sb->s_cop->prepare_context) {
- res = inode->i_sb->s_cop->prepare_context(inode);
- if (res)
- return res;
- }
-
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 7d398d300e97..9382db998ec9 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -743,7 +743,7 @@ static int tcp_accept_from_sock(struct connection *con)
newsock->type = con->sock->type;
newsock->ops = con->sock->ops;
- result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
+ result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true);
if (result < 0)
goto accept_err;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 30a9f210d1e3..375fb1c05d49 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1169,10 +1169,9 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
set_buffer_uptodate(dir_block);
err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
if (err)
- goto out;
+ return err;
set_buffer_verified(dir_block);
-out:
- return err;
+ return ext4_mark_inode_dirty(handle, inode);
}
static int ext4_convert_inline_data_nolock(handle_t *handle,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7385e6a6b6cb..4247d8d25687 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5400,7 +5400,7 @@ int ext4_getattr(const struct path *path, struct kstat *stat,
* If there is inline data in the inode, the inode will normally not
* have data blocks allocated (it may have an external xattr block).
* Report at least one sector for such files, so tools like tar, rsync,
- * others doen't incorrectly think the file is completely sparse.
+ * others don't incorrectly think the file is completely sparse.
*/
if (unlikely(ext4_has_inline_data(inode)))
stat->blocks += (stat->size + 511) >> 9;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 578f8c33fb44..c992ef2c2f94 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -511,7 +511,7 @@ mext_check_arguments(struct inode *orig_inode,
if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
(donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
ext4_debug("ext4 move extent: orig and donor's start "
- "offset are not alligned [ino:orig %lu, donor %lu]\n",
+ "offsets are not aligned [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2e03a0a88d92..a9448db1cf7e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1120,17 +1120,16 @@ static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
}
-static int ext4_prepare_context(struct inode *inode)
-{
- return ext4_convert_inline_data(inode);
-}
-
static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
void *fs_data)
{
handle_t *handle = fs_data;
int res, res2, retries = 0;
+ res = ext4_convert_inline_data(inode);
+ if (res)
+ return res;
+
/*
* If a journal handle was specified, then the encryption context is
* being set on a new inode via inheritance and is part of a larger
@@ -1196,7 +1195,6 @@ static unsigned ext4_max_namelen(struct inode *inode)
static const struct fscrypt_operations ext4_cryptops = {
.key_prefix = "ext4:",
.get_context = ext4_get_context,
- .prepare_context = ext4_prepare_context,
.set_context = ext4_set_context,
.dummy_context = ext4_dummy_context,
.is_encrypted = ext4_encrypted_inode,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 67636acf7624..996e7900d4c8 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -131,31 +131,26 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
}
static int ext4_xattr_block_csum_verify(struct inode *inode,
- sector_t block_nr,
- struct ext4_xattr_header *hdr)
+ struct buffer_head *bh)
{
- if (ext4_has_metadata_csum(inode->i_sb) &&
- (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
- return 0;
- return 1;
-}
-
-static void ext4_xattr_block_csum_set(struct inode *inode,
- sector_t block_nr,
- struct ext4_xattr_header *hdr)
-{
- if (!ext4_has_metadata_csum(inode->i_sb))
- return;
+ struct ext4_xattr_header *hdr = BHDR(bh);
+ int ret = 1;
- hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
+ if (ext4_has_metadata_csum(inode->i_sb)) {
+ lock_buffer(bh);
+ ret = (hdr->h_checksum == ext4_xattr_block_csum(inode,
+ bh->b_blocknr, hdr));
+ unlock_buffer(bh);
+ }
+ return ret;
}
-static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
- struct inode *inode,
- struct buffer_head *bh)
+static void ext4_xattr_block_csum_set(struct inode *inode,
+ struct buffer_head *bh)
{
- ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
- return ext4_handle_dirty_metadata(handle, inode, bh);
+ if (ext4_has_metadata_csum(inode->i_sb))
+ BHDR(bh)->h_checksum = ext4_xattr_block_csum(inode,
+ bh->b_blocknr, BHDR(bh));
}
static inline const struct xattr_handler *
@@ -233,7 +228,7 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1))
return -EFSCORRUPTED;
- if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
+ if (!ext4_xattr_block_csum_verify(inode, bh))
return -EFSBADCRC;
error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
bh->b_data);
@@ -618,23 +613,22 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
}
}
+ ext4_xattr_block_csum_set(inode, bh);
/*
* Beware of this ugliness: Releasing of xattr block references
* from different inodes can race and so we have to protect
* from a race where someone else frees the block (and releases
* its journal_head) before we are done dirtying the buffer. In
* nojournal mode this race is harmless and we actually cannot
- * call ext4_handle_dirty_xattr_block() with locked buffer as
+ * call ext4_handle_dirty_metadata() with locked buffer as
* that function can call sync_dirty_buffer() so for that case
* we handle the dirtying after unlocking the buffer.
*/
if (ext4_handle_valid(handle))
- error = ext4_handle_dirty_xattr_block(handle, inode,
- bh);
+ error = ext4_handle_dirty_metadata(handle, inode, bh);
unlock_buffer(bh);
if (!ext4_handle_valid(handle))
- error = ext4_handle_dirty_xattr_block(handle, inode,
- bh);
+ error = ext4_handle_dirty_metadata(handle, inode, bh);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
@@ -863,13 +857,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
ext4_xattr_cache_insert(ext4_mb_cache,
bs->bh);
}
+ ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
if (error == -EFSCORRUPTED)
goto bad_block;
if (!error)
- error = ext4_handle_dirty_xattr_block(handle,
- inode,
- bs->bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode,
+ bs->bh);
if (error)
goto cleanup;
goto inserted;
@@ -967,10 +962,11 @@ inserted:
ce->e_reusable = 0;
ea_bdebug(new_bh, "reusing; refcount now=%d",
ref);
+ ext4_xattr_block_csum_set(inode, new_bh);
unlock_buffer(new_bh);
- error = ext4_handle_dirty_xattr_block(handle,
- inode,
- new_bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode,
+ new_bh);
if (error)
goto cleanup_dquot;
}
@@ -1020,11 +1016,12 @@ getblk_failed:
goto getblk_failed;
}
memcpy(new_bh->b_data, s->base, new_bh->b_size);
+ ext4_xattr_block_csum_set(inode, new_bh);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
- error = ext4_handle_dirty_xattr_block(handle,
- inode, new_bh);
+ error = ext4_handle_dirty_metadata(handle, inode,
+ new_bh);
if (error)
goto cleanup;
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a77df377e2e8..ee2d0a485fc3 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -196,6 +196,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
si->base_mem += NM_I(sbi)->nat_blocks / 8;
+ si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
get_cache:
si->cache_mem = 0;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4650c9b85de7..8d5c62b07b28 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -750,7 +750,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
dentry_blk = page_address(page);
bit_pos = dentry - dentry_blk->dentry;
for (i = 0; i < slots; i++)
- clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+ __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
/* Let's check and deallocate this dentry page */
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e849f83d6114..0a6e115562f6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -561,6 +561,8 @@ struct f2fs_nm_info {
struct mutex build_lock; /* lock for build free nids */
unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
unsigned char *nat_block_bitmap;
+ unsigned short *free_nid_count; /* free nid count of NAT block */
+ spinlock_t free_nid_lock; /* protect updating of nid count */
/* for checkpoint */
char *nat_bitmap; /* NAT bitmap pointer */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 94967171dee8..481aa8dc79f4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -338,9 +338,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
- if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
- clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
-
/* update fsync_mark if its inode nat entry is still alive */
if (ni->nid != ni->ino)
e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1823,7 +1820,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
-void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
+static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
+ bool set, bool build, bool locked)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -1833,9 +1831,18 @@ void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
return;
if (set)
- set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+ __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
else
- clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+ __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+
+ if (!locked)
+ spin_lock(&nm_i->free_nid_lock);
+ if (set)
+ nm_i->free_nid_count[nat_ofs]++;
+ else if (!build)
+ nm_i->free_nid_count[nat_ofs]--;
+ if (!locked)
+ spin_unlock(&nm_i->free_nid_lock);
}
static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1847,7 +1854,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
int i;
- set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
+ if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+ return;
+
+ __set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
i = start_nid % NAT_ENTRY_PER_BLOCK;
@@ -1861,7 +1871,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
if (blk_addr == NULL_ADDR)
freed = add_free_nid(sbi, start_nid, true);
- update_free_nid_bitmap(sbi, start_nid, freed);
+ update_free_nid_bitmap(sbi, start_nid, freed, true, false);
}
}
@@ -1877,6 +1887,8 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
for (i = 0; i < nm_i->nat_blocks; i++) {
if (!test_bit_le(i, nm_i->nat_block_bitmap))
continue;
+ if (!nm_i->free_nid_count[i])
+ continue;
for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
nid_t nid;
@@ -1907,58 +1919,6 @@ out:
up_read(&nm_i->nat_tree_lock);
}
-static int scan_nat_bits(struct f2fs_sb_info *sbi)
-{
- struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct page *page;
- unsigned int i = 0;
- nid_t nid;
-
- if (!enabled_nat_bits(sbi, NULL))
- return -EAGAIN;
-
- down_read(&nm_i->nat_tree_lock);
-check_empty:
- i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
- if (i >= nm_i->nat_blocks) {
- i = 0;
- goto check_partial;
- }
-
- for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
- nid++) {
- if (unlikely(nid >= nm_i->max_nid))
- break;
- add_free_nid(sbi, nid, true);
- }
-
- if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
- goto out;
- i++;
- goto check_empty;
-
-check_partial:
- i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
- if (i >= nm_i->nat_blocks) {
- disable_nat_bits(sbi, true);
- up_read(&nm_i->nat_tree_lock);
- return -EINVAL;
- }
-
- nid = i * NAT_ENTRY_PER_BLOCK;
- page = get_current_nat_page(sbi, nid);
- scan_nat_page(sbi, page, nid);
- f2fs_put_page(page, 1);
-
- if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
- i++;
- goto check_partial;
- }
-out:
- up_read(&nm_i->nat_tree_lock);
- return 0;
-}
-
static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1980,21 +1940,6 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
if (nm_i->nid_cnt[FREE_NID_LIST])
return;
-
- /* try to find free nids with nat_bits */
- if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
- return;
- }
-
- /* find next valid candidate */
- if (enabled_nat_bits(sbi, NULL)) {
- int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
- nm_i->nat_blocks, 0);
-
- if (idx >= nm_i->nat_blocks)
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- else
- nid = idx * NAT_ENTRY_PER_BLOCK;
}
/* readahead nat pages to be scanned */
@@ -2081,7 +2026,7 @@ retry:
__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
nm_i->available_nids--;
- update_free_nid_bitmap(sbi, *nid, false);
+ update_free_nid_bitmap(sbi, *nid, false, false, false);
spin_unlock(&nm_i->nid_list_lock);
return true;
@@ -2137,7 +2082,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
nm_i->available_nids++;
- update_free_nid_bitmap(sbi, nid, true);
+ update_free_nid_bitmap(sbi, nid, true, false, false);
spin_unlock(&nm_i->nid_list_lock);
@@ -2383,7 +2328,7 @@ add_out:
list_add_tail(&nes->set_list, head);
}
-void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
struct page *page)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2402,16 +2347,16 @@ void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
valid++;
}
if (valid == 0) {
- set_bit_le(nat_index, nm_i->empty_nat_bits);
- clear_bit_le(nat_index, nm_i->full_nat_bits);
+ __set_bit_le(nat_index, nm_i->empty_nat_bits);
+ __clear_bit_le(nat_index, nm_i->full_nat_bits);
return;
}
- clear_bit_le(nat_index, nm_i->empty_nat_bits);
+ __clear_bit_le(nat_index, nm_i->empty_nat_bits);
if (valid == NAT_ENTRY_PER_BLOCK)
- set_bit_le(nat_index, nm_i->full_nat_bits);
+ __set_bit_le(nat_index, nm_i->full_nat_bits);
else
- clear_bit_le(nat_index, nm_i->full_nat_bits);
+ __clear_bit_le(nat_index, nm_i->full_nat_bits);
}
static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
@@ -2467,11 +2412,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
add_free_nid(sbi, nid, false);
spin_lock(&NM_I(sbi)->nid_list_lock);
NM_I(sbi)->available_nids++;
- update_free_nid_bitmap(sbi, nid, true);
+ update_free_nid_bitmap(sbi, nid, true, false, false);
spin_unlock(&NM_I(sbi)->nid_list_lock);
} else {
spin_lock(&NM_I(sbi)->nid_list_lock);
- update_free_nid_bitmap(sbi, nid, false);
+ update_free_nid_bitmap(sbi, nid, false, false, false);
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
}
@@ -2577,6 +2522,40 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
return 0;
}
+inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned int i = 0;
+ nid_t nid, last_nid;
+
+ if (!enabled_nat_bits(sbi, NULL))
+ return;
+
+ for (i = 0; i < nm_i->nat_blocks; i++) {
+ i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+ if (i >= nm_i->nat_blocks)
+ break;
+
+ __set_bit_le(i, nm_i->nat_block_bitmap);
+
+ nid = i * NAT_ENTRY_PER_BLOCK;
+ last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
+
+ spin_lock(&nm_i->free_nid_lock);
+ for (; nid < last_nid; nid++)
+ update_free_nid_bitmap(sbi, nid, true, true, true);
+ spin_unlock(&nm_i->free_nid_lock);
+ }
+
+ for (i = 0; i < nm_i->nat_blocks; i++) {
+ i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+ if (i >= nm_i->nat_blocks)
+ break;
+
+ __set_bit_le(i, nm_i->nat_block_bitmap);
+ }
+}
+
static int init_node_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
@@ -2638,7 +2617,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
return 0;
}
-int init_free_nid_cache(struct f2fs_sb_info *sbi)
+static int init_free_nid_cache(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2651,6 +2630,14 @@ int init_free_nid_cache(struct f2fs_sb_info *sbi)
GFP_KERNEL);
if (!nm_i->nat_block_bitmap)
return -ENOMEM;
+
+ nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks *
+ sizeof(unsigned short), GFP_KERNEL);
+ if (!nm_i->free_nid_count)
+ return -ENOMEM;
+
+ spin_lock_init(&nm_i->free_nid_lock);
+
return 0;
}
@@ -2670,6 +2657,9 @@ int build_node_manager(struct f2fs_sb_info *sbi)
if (err)
return err;
+ /* load free nid status from nat_bits table */
+ load_free_nid_bitmap(sbi);
+
build_free_nids(sbi, true, true);
return 0;
}
@@ -2730,6 +2720,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
kvfree(nm_i->nat_block_bitmap);
kvfree(nm_i->free_nid_bitmap);
+ kvfree(nm_i->free_nid_count);
kfree(nm_i->nat_bitmap);
kfree(nm_i->nat_bits);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4bd7a8b19332..29ef7088c558 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1163,6 +1163,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
if (f2fs_discard_en(sbi) &&
!f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
+
+ /* don't overwrite by SSR to keep node chain */
+ if (se->type == CURSEG_WARM_NODE) {
+ if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
+ se->ckpt_valid_blocks++;
+ }
} else {
if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
#ifdef CONFIG_F2FS_CHECK_FS
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 338d2f73eb29..a2c05f2ada6d 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1359,6 +1359,16 @@ out:
return 0;
}
+static void fat_dummy_inode_init(struct inode *inode)
+{
+ /* Initialize this dummy inode to work as no-op. */
+ MSDOS_I(inode)->mmu_private = 0;
+ MSDOS_I(inode)->i_start = 0;
+ MSDOS_I(inode)->i_logstart = 0;
+ MSDOS_I(inode)->i_attrs = 0;
+ MSDOS_I(inode)->i_pos = 0;
+}
+
static int fat_read_root(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@@ -1803,12 +1813,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
fat_inode = new_inode(sb);
if (!fat_inode)
goto out_fail;
- MSDOS_I(fat_inode)->i_pos = 0;
+ fat_dummy_inode_init(fat_inode);
sbi->fat_inode = fat_inode;
fsinfo_inode = new_inode(sb);
if (!fsinfo_inode)
goto out_fail;
+ fat_dummy_inode_init(fsinfo_inode);
fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
sbi->fsinfo_inode = fsinfo_inode;
insert_inode_hash(fsinfo_inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ef600591d96f..63ee2940775c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb)
spin_unlock_bh(&wb->work_lock);
}
+static void finish_writeback_work(struct bdi_writeback *wb,
+ struct wb_writeback_work *work)
+{
+ struct wb_completion *done = work->done;
+
+ if (work->auto_free)
+ kfree(work);
+ if (done && atomic_dec_and_test(&done->cnt))
+ wake_up_all(&wb->bdi->wb_waitq);
+}
+
static void wb_queue_work(struct bdi_writeback *wb,
struct wb_writeback_work *work)
{
trace_writeback_queue(wb, work);
- spin_lock_bh(&wb->work_lock);
- if (!test_bit(WB_registered, &wb->state))
- goto out_unlock;
if (work->done)
atomic_inc(&work->done->cnt);
- list_add_tail(&work->list, &wb->work_list);
- mod_delayed_work(bdi_wq, &wb->dwork, 0);
-out_unlock:
+
+ spin_lock_bh(&wb->work_lock);
+
+ if (test_bit(WB_registered, &wb->state)) {
+ list_add_tail(&work->list, &wb->work_list);
+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
+ } else
+ finish_writeback_work(wb, work);
+
spin_unlock_bh(&wb->work_lock);
}
@@ -1873,16 +1887,9 @@ static long wb_do_writeback(struct bdi_writeback *wb)
set_bit(WB_writeback_running, &wb->state);
while ((work = get_next_work_item(wb)) != NULL) {
- struct wb_completion *done = work->done;
-
trace_writeback_exec(wb, work);
-
wrote += wb_writeback(wb, work);
-
- if (work->auto_free)
- kfree(work);
- if (done && atomic_dec_and_test(&done->cnt))
- wake_up_all(&wb->bdi->wb_waitq);
+ finish_writeback_work(wb, work);
}
/*
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c45084ac642d..511e1ed7e2de 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -207,7 +207,7 @@ struct lm_lockname {
struct gfs2_sbd *ln_sbd;
u64 ln_number;
unsigned int ln_type;
-};
+} __packed __aligned(sizeof(int));
#define lm_name_equal(name1, name2) \
(((name1)->ln_number == (name2)->ln_number) && \
diff --git a/fs/iomap.c b/fs/iomap.c
index 3ca1a8e44135..141c3cd55a8b 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -846,7 +846,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = file_inode(iocb->ki_filp);
size_t count = iov_iter_count(iter);
- loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0;
+ loff_t pos = iocb->ki_pos, start = pos;
+ loff_t end = iocb->ki_pos + count - 1, ret = 0;
unsigned int flags = IOMAP_DIRECT;
struct blk_plug plug;
struct iomap_dio *dio;
@@ -887,12 +888,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
}
if (mapping->nrpages) {
- ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
+ ret = filemap_write_and_wait_range(mapping, start, end);
if (ret)
goto out_free_dio;
ret = invalidate_inode_pages2_range(mapping,
- iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
@@ -941,6 +942,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
__set_current_state(TASK_RUNNING);
}
+ ret = iomap_dio_complete(dio);
+
/*
* Try again to invalidate clean pages which might have been cached by
* non-direct readahead, or faulted in by get_user_pages() if the source
@@ -949,12 +952,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
* this invalidation fails, tough, the write still worked...
*/
if (iov_iter_rw(iter) == WRITE && mapping->nrpages) {
- ret = invalidate_inode_pages2_range(mapping,
- iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
- WARN_ON_ONCE(ret);
+ int err = invalidate_inode_pages2_range(mapping,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT);
+ WARN_ON_ONCE(err);
}
- return iomap_dio_complete(dio);
+ return ret;
out_free_dio:
kfree(dio);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a1a359bfcc9c..5adc2fb62b0f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1125,10 +1125,8 @@ static journal_t *journal_init_common(struct block_device *bdev,
/* Set up a default-sized revoke table for the new mount. */
err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
- if (err) {
- kfree(journal);
- return NULL;
- }
+ if (err)
+ goto err_cleanup;
spin_lock_init(&journal->j_history_lock);
@@ -1145,23 +1143,25 @@ static journal_t *journal_init_common(struct block_device *bdev,
journal->j_wbufsize = n;
journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
GFP_KERNEL);
- if (!journal->j_wbuf) {
- kfree(journal);
- return NULL;
- }
+ if (!journal->j_wbuf)
+ goto err_cleanup;
bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
if (!bh) {
pr_err("%s: Cannot get buffer for journal superblock\n",
__func__);
- kfree(journal->j_wbuf);
- kfree(journal);
- return NULL;
+ goto err_cleanup;
}
journal->j_sb_buffer = bh;
journal->j_superblock = (journal_superblock_t *)bh->b_data;
return journal;
+
+err_cleanup:
+ kfree(journal->j_wbuf);
+ jbd2_journal_destroy_revoke(journal);
+ kfree(journal);
+ return NULL;
}
/* jbd2_journal_init_dev and jbd2_journal_init_inode:
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index cfc38b552118..f9aefcda5854 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -280,6 +280,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
fail1:
jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
+ journal->j_revoke_table[0] = NULL;
fail0:
return -ENOMEM;
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 8e4dc7ab584c..ac2dfe0c5a9c 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -809,7 +809,8 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
if (kn->flags & KERNFS_HAS_MMAP)
unmap_mapping_range(inode->i_mapping, 0, 0, 1);
- kernfs_release_file(kn, of);
+ if (kn->flags & KERNFS_HAS_RELEASE)
+ kernfs_release_file(kn, of);
}
mutex_unlock(&kernfs_open_file_mutex);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index bb79972dc638..773774531aff 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -232,12 +232,12 @@ static struct svc_serv_ops nfs41_cb_sv_ops = {
.svo_module = THIS_MODULE,
};
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
[0] = &nfs40_cb_sv_ops,
[1] = &nfs41_cb_sv_ops,
};
#else
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
[0] = &nfs40_cb_sv_ops,
[1] = NULL,
};
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 91a8d610ba0f..390ada8741bc 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -325,10 +325,33 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
return NULL;
}
-static bool nfs_client_init_is_complete(const struct nfs_client *clp)
+/*
+ * Return true if @clp is done initializing, false if still working on it.
+ *
+ * Use nfs_client_init_status to check if it was successful.
+ */
+bool nfs_client_init_is_complete(const struct nfs_client *clp)
{
return clp->cl_cons_state <= NFS_CS_READY;
}
+EXPORT_SYMBOL_GPL(nfs_client_init_is_complete);
+
+/*
+ * Return 0 if @clp was successfully initialized, -errno otherwise.
+ *
+ * This must be called *after* nfs_client_init_is_complete() returns true,
+ * otherwise it will pop WARN_ON_ONCE and return -EINVAL
+ */
+int nfs_client_init_status(const struct nfs_client *clp)
+{
+ /* called without checking nfs_client_init_is_complete */
+ if (clp->cl_cons_state > NFS_CS_READY) {
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+ return clp->cl_cons_state;
+}
+EXPORT_SYMBOL_GPL(nfs_client_init_status);
int nfs_wait_client_init_complete(const struct nfs_client *clp)
{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fb499a3f21b5..f92ba8d6c556 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2055,7 +2055,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
- struct dentry *dentry = NULL, *rehash = NULL;
+ struct dentry *dentry = NULL;
struct rpc_task *task;
int error = -EBUSY;
@@ -2078,10 +2078,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
* To prevent any new references to the target during the
* rename, we unhash the dentry in advance.
*/
- if (!d_unhashed(new_dentry)) {
+ if (!d_unhashed(new_dentry))
d_drop(new_dentry);
- rehash = new_dentry;
- }
if (d_count(new_dentry) > 2) {
int err;
@@ -2098,7 +2096,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
new_dentry = dentry;
- rehash = NULL;
new_inode = NULL;
}
}
@@ -2119,8 +2116,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
error = task->tk_status;
rpc_put_task(task);
out:
- if (rehash)
- d_rehash(rehash);
trace_nfs_rename_exit(old_dir, old_dentry,
new_dir, new_dentry, error);
/* new dentry created? */
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 44347f4bdc15..acd30baca461 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -202,10 +202,10 @@ static int filelayout_async_handle_error(struct rpc_task *task,
task->tk_status);
nfs4_mark_deviceid_unavailable(devid);
pnfs_error_mark_layout_for_return(inode, lseg);
- pnfs_set_lo_fail(lseg);
rpc_wake_up(&tbl->slot_tbl_waitq);
/* fall through */
default:
+ pnfs_set_lo_fail(lseg);
reset:
dprintk("%s Retry through MDS. Error %d\n", __func__,
task->tk_status);
@@ -560,6 +560,50 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
return PNFS_ATTEMPTED;
}
+static int
+filelayout_check_deviceid(struct pnfs_layout_hdr *lo,
+ struct nfs4_filelayout_segment *fl,
+ gfp_t gfp_flags)
+{
+ struct nfs4_deviceid_node *d;
+ struct nfs4_file_layout_dsaddr *dsaddr;
+ int status = -EINVAL;
+
+ /* find and reference the deviceid */
+ d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid,
+ lo->plh_lc_cred, gfp_flags);
+ if (d == NULL)
+ goto out;
+
+ dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+ /* Found deviceid is unavailable */
+ if (filelayout_test_devid_unavailable(&dsaddr->id_node))
+ goto out_put;
+
+ fl->dsaddr = dsaddr;
+
+ if (fl->first_stripe_index >= dsaddr->stripe_count) {
+ dprintk("%s Bad first_stripe_index %u\n",
+ __func__, fl->first_stripe_index);
+ goto out_put;
+ }
+
+ if ((fl->stripe_type == STRIPE_SPARSE &&
+ fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
+ (fl->stripe_type == STRIPE_DENSE &&
+ fl->num_fh != dsaddr->stripe_count)) {
+ dprintk("%s num_fh %u not valid for given packing\n",
+ __func__, fl->num_fh);
+ goto out_put;
+ }
+ status = 0;
+out:
+ return status;
+out_put:
+ nfs4_fl_put_deviceid(dsaddr);
+ goto out;
+}
+
/*
* filelayout_check_layout()
*
@@ -572,11 +616,8 @@ static int
filelayout_check_layout(struct pnfs_layout_hdr *lo,
struct nfs4_filelayout_segment *fl,
struct nfs4_layoutget_res *lgr,
- struct nfs4_deviceid *id,
gfp_t gfp_flags)
{
- struct nfs4_deviceid_node *d;
- struct nfs4_file_layout_dsaddr *dsaddr;
int status = -EINVAL;
dprintk("--> %s\n", __func__);
@@ -601,41 +642,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out;
}
- /* find and reference the deviceid */
- d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), id,
- lo->plh_lc_cred, gfp_flags);
- if (d == NULL)
- goto out;
-
- dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
- /* Found deviceid is unavailable */
- if (filelayout_test_devid_unavailable(&dsaddr->id_node))
- goto out_put;
-
- fl->dsaddr = dsaddr;
-
- if (fl->first_stripe_index >= dsaddr->stripe_count) {
- dprintk("%s Bad first_stripe_index %u\n",
- __func__, fl->first_stripe_index);
- goto out_put;
- }
-
- if ((fl->stripe_type == STRIPE_SPARSE &&
- fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
- (fl->stripe_type == STRIPE_DENSE &&
- fl->num_fh != dsaddr->stripe_count)) {
- dprintk("%s num_fh %u not valid for given packing\n",
- __func__, fl->num_fh);
- goto out_put;
- }
-
status = 0;
out:
dprintk("--> %s returns %d\n", __func__, status);
return status;
-out_put:
- nfs4_fl_put_deviceid(dsaddr);
- goto out;
}
static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
@@ -657,7 +667,6 @@ static int
filelayout_decode_layout(struct pnfs_layout_hdr *flo,
struct nfs4_filelayout_segment *fl,
struct nfs4_layoutget_res *lgr,
- struct nfs4_deviceid *id,
gfp_t gfp_flags)
{
struct xdr_stream stream;
@@ -682,9 +691,9 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
if (unlikely(!p))
goto out_err;
- memcpy(id, p, sizeof(*id));
+ memcpy(&fl->deviceid, p, sizeof(fl->deviceid));
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
- nfs4_print_deviceid(id);
+ nfs4_print_deviceid(&fl->deviceid);
nfl_util = be32_to_cpup(p++);
if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
@@ -831,15 +840,14 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
{
struct nfs4_filelayout_segment *fl;
int rc;
- struct nfs4_deviceid id;
dprintk("--> %s\n", __func__);
fl = kzalloc(sizeof(*fl), gfp_flags);
if (!fl)
return NULL;
- rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags);
- if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) {
+ rc = filelayout_decode_layout(layoutid, fl, lgr, gfp_flags);
+ if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, gfp_flags)) {
_filelayout_free_lseg(fl);
return NULL;
}
@@ -888,18 +896,51 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
return min(stripe_unit - (unsigned int)stripe_offset, size);
}
+static struct pnfs_layout_segment *
+fl_pnfs_update_layout(struct inode *ino,
+ struct nfs_open_context *ctx,
+ loff_t pos,
+ u64 count,
+ enum pnfs_iomode iomode,
+ bool strict_iomode,
+ gfp_t gfp_flags)
+{
+ struct pnfs_layout_segment *lseg = NULL;
+ struct pnfs_layout_hdr *lo;
+ struct nfs4_filelayout_segment *fl;
+ int status;
+
+ lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
+ gfp_flags);
+ if (!lseg)
+ lseg = ERR_PTR(-ENOMEM);
+ if (IS_ERR(lseg))
+ goto out;
+
+ lo = NFS_I(ino)->layout;
+ fl = FILELAYOUT_LSEG(lseg);
+
+ status = filelayout_check_deviceid(lo, fl, gfp_flags);
+ if (status)
+ lseg = ERR_PTR(status);
+out:
+ if (IS_ERR(lseg))
+ pnfs_put_lseg(lseg);
+ return lseg;
+}
+
static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
if (!pgio->pg_lseg) {
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- req->wb_context,
- 0,
- NFS4_MAX_UINT64,
- IOMODE_READ,
- false,
- GFP_KERNEL);
+ pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ 0,
+ NFS4_MAX_UINT64,
+ IOMODE_READ,
+ false,
+ GFP_KERNEL);
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -919,13 +960,13 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
int status;
if (!pgio->pg_lseg) {
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- req->wb_context,
- 0,
- NFS4_MAX_UINT64,
- IOMODE_RW,
- false,
- GFP_NOFS);
+ pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ 0,
+ NFS4_MAX_UINT64,
+ IOMODE_RW,
+ false,
+ GFP_NOFS);
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h
index 2896cb833a11..79323b5dab0c 100644
--- a/fs/nfs/filelayout/filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -55,15 +55,16 @@ struct nfs4_file_layout_dsaddr {
};
struct nfs4_filelayout_segment {
- struct pnfs_layout_segment generic_hdr;
- u32 stripe_type;
- u32 commit_through_mds;
- u32 stripe_unit;
- u32 first_stripe_index;
- u64 pattern_offset;
- struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
- unsigned int num_fh;
- struct nfs_fh **fh_array;
+ struct pnfs_layout_segment generic_hdr;
+ u32 stripe_type;
+ u32 commit_through_mds;
+ u32 stripe_unit;
+ u32 first_stripe_index;
+ u64 pattern_offset;
+ struct nfs4_deviceid deviceid;
+ struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
+ unsigned int num_fh;
+ struct nfs_fh **fh_array;
};
struct nfs4_filelayout {
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index f956ca20a8a3..d913e818858f 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -266,6 +266,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
struct nfs4_pnfs_ds *ret = ds;
struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
+ int status;
if (ds == NULL) {
printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
@@ -277,9 +278,14 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
if (ds->ds_clp)
goto out_test_devid;
- nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+ status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
dataserver_retrans, 4,
s->nfs_client->cl_minorversion);
+ if (status) {
+ nfs4_mark_deviceid_unavailable(devid);
+ ret = NULL;
+ goto out;
+ }
out_test_devid:
if (ret->ds_clp == NULL ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index f4f39b0ab09b..98b34c9b0564 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -175,7 +175,19 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg)
static inline bool
ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
{
- return nfs4_test_deviceid_unavailable(node);
+ /*
+ * Flexfiles should never mark a DS unavailable, but if it does
+ * print a (ratelimited) warning as this can affect performance.
+ */
+ if (nfs4_test_deviceid_unavailable(node)) {
+ u32 *p = (u32 *)node->deviceid.data;
+
+ pr_warn_ratelimited("NFS: flexfiles layout referencing an "
+ "unavailable device [%x%x%x%x]\n",
+ p[0], p[1], p[2], p[3]);
+ return true;
+ }
+ return false;
}
static inline int
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e5a6f248697b..457cfeb1d5c1 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -208,6 +208,10 @@ static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg,
} else
goto outerr;
}
+
+ if (IS_ERR(mirror->mirror_ds))
+ goto outerr;
+
if (mirror->mirror_ds->ds == NULL) {
struct nfs4_deviceid_node *devid;
devid = &mirror->mirror_ds->id_node;
@@ -384,6 +388,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
struct inode *ino = lseg->pls_layout->plh_inode;
struct nfs_server *s = NFS_SERVER(ino);
unsigned int max_payload;
+ int status;
if (!ff_layout_mirror_valid(lseg, mirror, true)) {
pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
@@ -404,7 +409,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
/* FIXME: For now we assume the server sent only one version of NFS
* to use for the DS.
*/
- nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+ status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
dataserver_retrans,
mirror->mirror_ds->ds_versions[0].version,
mirror->mirror_ds->ds_versions[0].minor_version);
@@ -420,11 +425,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
mirror->mirror_ds->ds_versions[0].wsize = max_payload;
goto out;
}
+out_fail:
ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
mirror, lseg->pls_range.offset,
lseg->pls_range.length, NFS4ERR_NXIO,
OP_ILLEGAL, GFP_NOIO);
-out_fail:
if (fail_return || !ff_layout_has_available_ds(lseg))
pnfs_error_mark_layout_for_return(ino, lseg);
ds = NULL;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 09ca5095c04e..7b38fedb7e03 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -186,6 +186,8 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
struct nfs_fh *,
struct nfs_fattr *,
rpc_authflavor_t);
+extern bool nfs_client_init_is_complete(const struct nfs_client *clp);
+extern int nfs_client_init_status(const struct nfs_client *clp);
extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 5ae9d64ea08b..8346ccbf2d52 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1023,9 +1023,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
- if (server->rsize > server_resp_sz)
+ if (!server->rsize || server->rsize > server_resp_sz)
server->rsize = server_resp_sz;
- if (server->wsize > server_rqst_sz)
+ if (!server->wsize || server->wsize > server_rqst_sz)
server->wsize = server_rqst_sz;
#endif /* CONFIG_NFS_V4_1 */
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1b183686c6d4..201ca3f2c4ba 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2258,8 +2258,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0)
return 0;
- /* even though OPEN succeeded, access is denied. Close the file */
- nfs4_close_state(state, fmode);
return -EACCES;
}
@@ -2444,17 +2442,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
}
nfs4_stateid_copy(&stateid, &delegation->stateid);
- if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+ if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) ||
+ !test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
+ &delegation->flags)) {
rcu_read_unlock();
nfs_finish_clear_delegation_stateid(state, &stateid);
return;
}
- if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) {
- rcu_read_unlock();
- return;
- }
-
cred = get_rpccred(delegation->cred);
rcu_read_unlock();
status = nfs41_test_and_free_expired_stateid(server, &stateid, cred);
@@ -7427,11 +7422,11 @@ static void nfs4_exchange_id_release(void *data)
struct nfs41_exchange_id_data *cdata =
(struct nfs41_exchange_id_data *)data;
- nfs_put_client(cdata->args.client);
if (cdata->xprt) {
xprt_put(cdata->xprt);
rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient);
}
+ nfs_put_client(cdata->args.client);
kfree(cdata->res.impl_id);
kfree(cdata->res.server_scope);
kfree(cdata->res.server_owner);
@@ -7538,10 +7533,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
task_setup_data.callback_data = calldata;
task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task)) {
- status = PTR_ERR(task);
- goto out_impl_id;
- }
+ if (IS_ERR(task))
+ return PTR_ERR(task);
if (!xprt) {
status = rpc_wait_for_completion_task(task);
@@ -7569,6 +7562,7 @@ out_server_owner:
kfree(calldata->res.server_owner);
out_calldata:
kfree(calldata);
+ nfs_put_client(clp);
goto out;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f0369e362753..80ce289eea05 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3942,7 +3942,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
if (len <= 0)
goto out;
dprintk("%s: name=%s\n", __func__, group_name->data);
- return NFS_ATTR_FATTR_OWNER_NAME;
+ return NFS_ATTR_FATTR_GROUP_NAME;
} else {
len = xdr_stream_decode_opaque_inline(xdr, (void **)&p,
XDR_MAX_NETOBJ);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 63f77b49a586..590e1e35781f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -367,7 +367,7 @@ void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds);
struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs,
gfp_t gfp_flags);
void nfs4_pnfs_v3_ds_connect_unload(void);
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
struct nfs4_deviceid_node *devid, unsigned int timeo,
unsigned int retrans, u32 version, u32 minor_version);
struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net,
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 9414b492439f..7250b95549ec 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -745,15 +745,17 @@ out:
/*
* Create an rpc connection to the nfs4_pnfs_ds data server.
* Currently only supports IPv4 and IPv6 addresses.
- * If connection fails, make devid unavailable.
+ * If connection fails, make devid unavailable and return a -errno.
*/
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
struct nfs4_deviceid_node *devid, unsigned int timeo,
unsigned int retrans, u32 version, u32 minor_version)
{
- if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
- int err = 0;
+ int err;
+again:
+ err = 0;
+ if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
if (version == 3) {
err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
retrans);
@@ -766,12 +768,29 @@ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
err = -EPROTONOSUPPORT;
}
- if (err)
- nfs4_mark_deviceid_unavailable(devid);
nfs4_clear_ds_conn_bit(ds);
} else {
nfs4_wait_ds_connect(ds);
+
+ /* what was waited on didn't connect AND didn't mark unavail */
+ if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid))
+ goto again;
}
+
+ /*
+ * At this point the ds->ds_clp should be ready, but it might have
+ * hit an error.
+ */
+ if (!err) {
+ if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
+ WARN_ON_ONCE(ds->ds_clp ||
+ !nfs4_test_deviceid_unavailable(devid));
+ return -EINVAL;
+ }
+ err = nfs_client_init_status(ds->ds_clp);
+ }
+
+ return err;
}
EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e75b056f46f4..abb2c8a3be42 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1784,7 +1784,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
(long long)req_offset(req));
if (status < 0) {
nfs_context_set_write_error(req->wb_context, status);
- nfs_inode_remove_request(req);
+ if (req->wb_page)
+ nfs_inode_remove_request(req);
dprintk_cont(", error = %d\n", status);
goto next;
}
@@ -1793,7 +1794,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
* returned by the server against all stored verfs. */
if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
/* We have a match */
- nfs_inode_remove_request(req);
+ if (req->wb_page)
+ nfs_inode_remove_request(req);
dprintk_cont(" OK\n");
goto next;
}
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 4348027384f5..d0ab7e56d0b4 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1863,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
new_sock->type = sock->type;
new_sock->ops = sock->ops;
- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+ ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
if (ret < 0)
goto out;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 1953986ee6bc..6e610a205e15 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -12,7 +12,6 @@
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/xattr.h>
-#include <linux/sched/signal.h>
#include "overlayfs.h"
#include "ovl_entry.h"
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 384fa759a563..c543cdb5f8ed 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -400,9 +400,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
clockid != CLOCK_BOOTTIME_ALARM))
return -EINVAL;
- if (!capable(CAP_WAKE_ALARM) &&
- (clockid == CLOCK_REALTIME_ALARM ||
- clockid == CLOCK_BOOTTIME_ALARM))
+ if ((clockid == CLOCK_REALTIME_ALARM ||
+ clockid == CLOCK_BOOTTIME_ALARM) &&
+ !capable(CAP_WAKE_ALARM))
return -EPERM;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -449,7 +449,7 @@ static int do_timerfd_settime(int ufd, int flags,
return ret;
ctx = f.file->private_data;
- if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) {
+ if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) {
fdput(f);
return -EPERM;
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 973607df579d..1d227b0fcf49 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -138,8 +138,6 @@ out:
* userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd
* context.
* @ctx: [in] Pointer to the userfaultfd context.
- *
- * Returns: In case of success, returns not zero.
*/
static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
{
@@ -267,6 +265,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
{
struct mm_struct *mm = ctx->mm;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd, _pmd;
pte_t *pte;
@@ -277,7 +276,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
goto out;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (!p4d_present(*p4d))
+ goto out;
+ pud = pud_offset(p4d, address);
if (!pud_present(*pud))
goto out;
pmd = pmd_offset(pud, address);
@@ -490,7 +492,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
* in such case.
*/
down_read(&mm->mmap_sem);
- ret = 0;
+ ret = VM_FAULT_NOPAGE;
}
}
@@ -527,10 +529,11 @@ out:
return ret;
}
-static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
- struct userfaultfd_wait_queue *ewq)
+static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
+ struct userfaultfd_wait_queue *ewq)
{
- int ret = 0;
+ if (WARN_ON_ONCE(current->flags & PF_EXITING))
+ goto out;
ewq->ctx = ctx;
init_waitqueue_entry(&ewq->wq, current);
@@ -547,8 +550,16 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
break;
if (ACCESS_ONCE(ctx->released) ||
fatal_signal_pending(current)) {
- ret = -1;
__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
+ if (ewq->msg.event == UFFD_EVENT_FORK) {
+ struct userfaultfd_ctx *new;
+
+ new = (struct userfaultfd_ctx *)
+ (unsigned long)
+ ewq->msg.arg.reserved.reserved1;
+
+ userfaultfd_ctx_put(new);
+ }
break;
}
@@ -566,9 +577,8 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
* ctx may go away after this if the userfault pseudo fd is
* already released.
*/
-
+out:
userfaultfd_ctx_put(ctx);
- return ret;
}
static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx,
@@ -626,7 +636,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
return 0;
}
-static int dup_fctx(struct userfaultfd_fork_ctx *fctx)
+static void dup_fctx(struct userfaultfd_fork_ctx *fctx)
{
struct userfaultfd_ctx *ctx = fctx->orig;
struct userfaultfd_wait_queue ewq;
@@ -636,17 +646,15 @@ static int dup_fctx(struct userfaultfd_fork_ctx *fctx)
ewq.msg.event = UFFD_EVENT_FORK;
ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new;
- return userfaultfd_event_wait_completion(ctx, &ewq);
+ userfaultfd_event_wait_completion(ctx, &ewq);
}
void dup_userfaultfd_complete(struct list_head *fcs)
{
- int ret = 0;
struct userfaultfd_fork_ctx *fctx, *n;
list_for_each_entry_safe(fctx, n, fcs, list) {
- if (!ret)
- ret = dup_fctx(fctx);
+ dup_fctx(fctx);
list_del(&fctx->list);
kfree(fctx);
}
@@ -689,8 +697,7 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
userfaultfd_event_wait_completion(ctx, &ewq);
}
-void userfaultfd_remove(struct vm_area_struct *vma,
- struct vm_area_struct **prev,
+bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
@@ -699,13 +706,11 @@ void userfaultfd_remove(struct vm_area_struct *vma,
ctx = vma->vm_userfaultfd_ctx.ctx;
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
- return;
+ return true;
userfaultfd_ctx_get(ctx);
up_read(&mm->mmap_sem);
- *prev = NULL; /* We wait for ACK w/o the mmap semaphore */
-
msg_init(&ewq.msg);
ewq.msg.event = UFFD_EVENT_REMOVE;
@@ -714,7 +719,7 @@ void userfaultfd_remove(struct vm_area_struct *vma,
userfaultfd_event_wait_completion(ctx, &ewq);
- down_read(&mm->mmap_sem);
+ return false;
}
static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
@@ -775,34 +780,6 @@ void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
}
}
-void userfaultfd_exit(struct mm_struct *mm)
-{
- struct vm_area_struct *vma = mm->mmap;
-
- /*
- * We can do the vma walk without locking because the caller
- * (exit_mm) knows it now has exclusive access
- */
- while (vma) {
- struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
-
- if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) {
- struct userfaultfd_wait_queue ewq;
-
- userfaultfd_ctx_get(ctx);
-
- msg_init(&ewq.msg);
- ewq.msg.event = UFFD_EVENT_EXIT;
-
- userfaultfd_event_wait_completion(ctx, &ewq);
-
- ctx->features &= ~UFFD_FEATURE_EVENT_EXIT;
- }
-
- vma = vma->vm_next;
- }
-}
-
static int userfaultfd_release(struct inode *inode, struct file *file)
{
struct userfaultfd_ctx *ctx = file->private_data;
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 2dfdc62f795e..70a5b55e0870 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -25,24 +25,6 @@
#include "kmem.h"
#include "xfs_message.h"
-/*
- * Greedy allocation. May fail and may return vmalloced memory.
- */
-void *
-kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
-{
- void *ptr;
- size_t kmsize = maxsize;
-
- while (!(ptr = vzalloc(kmsize))) {
- if ((kmsize >>= 1) <= minsize)
- kmsize = minsize;
- }
- if (ptr)
- *size = kmsize;
- return ptr;
-}
-
void *
kmem_alloc(size_t size, xfs_km_flags_t flags)
{
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 689f746224e7..f0fc84fcaac2 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -69,8 +69,6 @@ static inline void kmem_free(const void *ptr)
}
-extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
-
static inline void *
kmem_zalloc(size_t size, xfs_km_flags_t flags)
{
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a9c66d47757a..9bd104f32908 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -763,8 +763,8 @@ xfs_bmap_extents_to_btree(
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
} else if (dfops->dop_low) {
-try_another_ag:
args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
args.fsbno = *firstblock;
} else {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -790,13 +790,17 @@ try_another_ag:
if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
args.fsbno == NULLFSBLOCK &&
args.type == XFS_ALLOCTYPE_NEAR_BNO) {
- dfops->dop_low = true;
+ args.type = XFS_ALLOCTYPE_FIRST_AG;
goto try_another_ag;
}
+ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
+ xfs_iroot_realloc(ip, -1, whichfork);
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return -ENOSPC;
+ }
/*
* Allocation can't fail, the space was reserved.
*/
- ASSERT(args.fsbno != NULLFSBLOCK);
ASSERT(*firstblock == NULLFSBLOCK ||
args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
*firstblock = cur->bc_private.b.firstblock = args.fsbno;
@@ -4150,6 +4154,19 @@ xfs_bmapi_read(
return 0;
}
+/*
+ * Add a delayed allocation extent to an inode. Blocks are reserved from the
+ * global pool and the extent inserted into the inode in-core extent tree.
+ *
+ * On entry, got refers to the first extent beyond the offset of the extent to
+ * allocate or eof is specified if no such extent exists. On return, got refers
+ * to the extent record that was inserted to the inode fork.
+ *
+ * Note that the allocated extent may have been merged with contiguous extents
+ * during insertion into the inode fork. Thus, got does not reflect the current
+ * state of the inode fork on return. If necessary, the caller can use lastx to
+ * look up the updated record in the inode fork.
+ */
int
xfs_bmapi_reserve_delalloc(
struct xfs_inode *ip,
@@ -4236,13 +4253,8 @@ xfs_bmapi_reserve_delalloc(
got->br_startblock = nullstartblock(indlen);
got->br_blockcount = alen;
got->br_state = XFS_EXT_NORM;
- xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
- /*
- * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
- * might have merged it into one of the neighbouring ones.
- */
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+ xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
/*
* Tag the inode if blocks were preallocated. Note that COW fork
@@ -4254,10 +4266,6 @@ xfs_bmapi_reserve_delalloc(
if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
xfs_inode_set_cowblocks_tag(ip);
- ASSERT(got->br_startoff <= aoff);
- ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
- ASSERT(isnullstartblock(got->br_startblock));
- ASSERT(got->br_state == XFS_EXT_NORM);
return 0;
out_unreserve_blocks:
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index f93072b58a58..fd55db479385 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -447,8 +447,8 @@ xfs_bmbt_alloc_block(
if (args.fsbno == NULLFSBLOCK) {
args.fsbno = be64_to_cpu(start->l);
-try_another_ag:
args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
/*
* Make sure there is sufficient room left in the AG to
* complete a full tree split for an extent insert. If
@@ -488,8 +488,8 @@ try_another_ag:
if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
args.fsbno == NULLFSBLOCK &&
args.type == XFS_ALLOCTYPE_NEAR_BNO) {
- cur->bc_private.b.dfops->dop_low = true;
args.fsbno = cur->bc_private.b.firstblock;
+ args.type = XFS_ALLOCTYPE_FIRST_AG;
goto try_another_ag;
}
@@ -506,7 +506,7 @@ try_another_ag:
goto error0;
cur->bc_private.b.dfops->dop_low = true;
}
- if (args.fsbno == NULLFSBLOCK) {
+ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index d04547fcf274..eb00bc133bca 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -125,6 +125,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
+ int size);
/* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index c6809ff41197..96b45cd6c63f 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -629,6 +629,93 @@ xfs_dir2_sf_check(
}
#endif /* DEBUG */
+/* Verify the consistency of an inline directory. */
+int
+xfs_dir2_sf_verify(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *sfp,
+ int size)
+{
+ struct xfs_dir2_sf_entry *sfep;
+ struct xfs_dir2_sf_entry *next_sfep;
+ char *endp;
+ const struct xfs_dir_ops *dops;
+ xfs_ino_t ino;
+ int i;
+ int i8count;
+ int offset;
+ __uint8_t filetype;
+
+ dops = xfs_dir_get_ops(mp, NULL);
+
+ /*
+ * Give up if the directory is way too short.
+ */
+ XFS_WANT_CORRUPTED_RETURN(mp, size >
+ offsetof(struct xfs_dir2_sf_hdr, parent));
+ XFS_WANT_CORRUPTED_RETURN(mp, size >=
+ xfs_dir2_sf_hdr_size(sfp->i8count));
+
+ endp = (char *)sfp + size;
+
+ /* Check .. entry */
+ ino = dops->sf_get_parent_ino(sfp);
+ i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+ XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+ offset = dops->data_first_offset;
+
+ /* Check all reported entries */
+ sfep = xfs_dir2_sf_firstentry(sfp);
+ for (i = 0; i < sfp->count; i++) {
+ /*
+ * struct xfs_dir2_sf_entry has a variable length.
+ * Check the fixed-offset parts of the structure are
+ * within the data buffer.
+ */
+ XFS_WANT_CORRUPTED_RETURN(mp,
+ ((char *)sfep + sizeof(*sfep)) < endp);
+
+ /* Don't allow names with known bad length. */
+ XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
+ XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
+
+ /*
+ * Check that the variable-length part of the structure is
+ * within the data buffer. The next entry starts after the
+ * name component, so nextentry is an acceptable test.
+ */
+ next_sfep = dops->sf_nextentry(sfp, sfep);
+ XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
+
+ /* Check that the offsets always increase. */
+ XFS_WANT_CORRUPTED_RETURN(mp,
+ xfs_dir2_sf_get_offset(sfep) >= offset);
+
+ /* Check the inode number. */
+ ino = dops->sf_get_ino(sfp, sfep);
+ i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+ XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+
+ /* Check the file type. */
+ filetype = dops->sf_get_ftype(sfep);
+ XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
+
+ offset = xfs_dir2_sf_get_offset(sfep) +
+ dops->data_entsize(sfep->namelen);
+
+ sfep = next_sfep;
+ }
+ XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
+ XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
+
+ /* Make sure this whole thing ought to be in local format. */
+ XFS_WANT_CORRUPTED_RETURN(mp, offset +
+ (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+ (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
+
+ return 0;
+}
+
/*
* Create a new (shortform) directory.
*/
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 25c1e078aef6..9653e964eda4 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -33,6 +33,8 @@
#include "xfs_trace.h"
#include "xfs_attr_sf.h"
#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_priv.h"
kmem_zone_t *xfs_ifork_zone;
@@ -320,6 +322,7 @@ xfs_iformat_local(
int whichfork,
int size)
{
+ int error;
/*
* If the size is unreasonable, then something
@@ -336,6 +339,14 @@ xfs_iformat_local(
return -EFSCORRUPTED;
}
+ if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+ error = xfs_dir2_sf_verify(ip->i_mount,
+ (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
+ size);
+ if (error)
+ return error;
+ }
+
xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
return 0;
}
@@ -856,7 +867,7 @@ xfs_iextents_copy(
* In these cases, the format always takes precedence, because the
* format indicates the current state of the fork.
*/
-void
+int
xfs_iflush_fork(
xfs_inode_t *ip,
xfs_dinode_t *dip,
@@ -866,6 +877,7 @@ xfs_iflush_fork(
char *cp;
xfs_ifork_t *ifp;
xfs_mount_t *mp;
+ int error;
static const short brootflag[2] =
{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
static const short dataflag[2] =
@@ -874,7 +886,7 @@ xfs_iflush_fork(
{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
if (!iip)
- return;
+ return 0;
ifp = XFS_IFORK_PTR(ip, whichfork);
/*
* This can happen if we gave up in iformat in an error path,
@@ -882,12 +894,19 @@ xfs_iflush_fork(
*/
if (!ifp) {
ASSERT(whichfork == XFS_ATTR_FORK);
- return;
+ return 0;
}
cp = XFS_DFORK_PTR(dip, whichfork);
mp = ip->i_mount;
switch (XFS_IFORK_FORMAT(ip, whichfork)) {
case XFS_DINODE_FMT_LOCAL:
+ if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+ error = xfs_dir2_sf_verify(mp,
+ (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
+ ifp->if_bytes);
+ if (error)
+ return error;
+ }
if ((iip->ili_fields & dataflag[whichfork]) &&
(ifp->if_bytes > 0)) {
ASSERT(ifp->if_u1.if_data != NULL);
@@ -940,6 +959,7 @@ xfs_iflush_fork(
ASSERT(0);
break;
}
+ return 0;
}
/*
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 7fb8365326d1..132dc59fdde6 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -140,7 +140,7 @@ typedef struct xfs_ifork {
struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
-void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+int xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
struct xfs_inode_log_item *, int);
void xfs_idestroy_fork(struct xfs_inode *, int);
void xfs_idata_realloc(struct xfs_inode *, int, int);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index bf65a9ea8642..61494295d92f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -274,54 +274,49 @@ xfs_end_io(
struct xfs_ioend *ioend =
container_of(work, struct xfs_ioend, io_work);
struct xfs_inode *ip = XFS_I(ioend->io_inode);
+ xfs_off_t offset = ioend->io_offset;
+ size_t size = ioend->io_size;
int error = ioend->io_bio->bi_error;
/*
- * Set an error if the mount has shut down and proceed with end I/O
- * processing so it can perform whatever cleanups are necessary.
+ * Just clean up the in-memory strutures if the fs has been shut down.
*/
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
error = -EIO;
+ goto done;
+ }
/*
- * For a CoW extent, we need to move the mapping from the CoW fork
- * to the data fork. If instead an error happened, just dump the
- * new blocks.
+ * Clean up any COW blocks on an I/O error.
*/
- if (ioend->io_type == XFS_IO_COW) {
- if (error)
- goto done;
- if (ioend->io_bio->bi_error) {
- error = xfs_reflink_cancel_cow_range(ip,
- ioend->io_offset, ioend->io_size);
- goto done;
+ if (unlikely(error)) {
+ switch (ioend->io_type) {
+ case XFS_IO_COW:
+ xfs_reflink_cancel_cow_range(ip, offset, size, true);
+ break;
}
- error = xfs_reflink_end_cow(ip, ioend->io_offset,
- ioend->io_size);
- if (error)
- goto done;
+
+ goto done;
}
/*
- * For unwritten extents we need to issue transactions to convert a
- * range to normal written extens after the data I/O has finished.
- * Detecting and handling completion IO errors is done individually
- * for each case as different cleanup operations need to be performed
- * on error.
+ * Success: commit the COW or unwritten blocks if needed.
*/
- if (ioend->io_type == XFS_IO_UNWRITTEN) {
- if (error)
- goto done;
- error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
- ioend->io_size);
- } else if (ioend->io_append_trans) {
- error = xfs_setfilesize_ioend(ioend, error);
- } else {
- ASSERT(!xfs_ioend_is_append(ioend) ||
- ioend->io_type == XFS_IO_COW);
+ switch (ioend->io_type) {
+ case XFS_IO_COW:
+ error = xfs_reflink_end_cow(ip, offset, size);
+ break;
+ case XFS_IO_UNWRITTEN:
+ error = xfs_iomap_write_unwritten(ip, offset, size);
+ break;
+ default:
+ ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
+ break;
}
done:
+ if (ioend->io_append_trans)
+ error = xfs_setfilesize_ioend(ioend, error);
xfs_destroy_ioend(ioend, error);
}
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 003a99b83bd8..ad9396e516f6 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -71,22 +71,11 @@ xfs_dir2_sf_getdents(
struct xfs_da_geometry *geo = args->geo;
ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
- /*
- * Give up if the directory is way too short.
- */
- if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
- ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return -EIO;
- }
-
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
- if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count))
- return -EFSCORRUPTED;
-
/*
* If the block number in the offset is out of range, we're done.
*/
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 7234b9748c36..3531f8f72fa5 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1608,7 +1608,7 @@ xfs_inode_free_cowblocks(
xfs_ilock(ip, XFS_IOLOCK_EXCL);
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
- ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+ ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index edfa6a55b064..c7fe2c2123ab 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1615,7 +1615,7 @@ xfs_itruncate_extents(
/* Remove all pending CoW reservations. */
error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
- last_block);
+ last_block, true);
if (error)
goto out;
@@ -3475,6 +3475,7 @@ xfs_iflush_int(
struct xfs_inode_log_item *iip = ip->i_itemp;
struct xfs_dinode *dip;
struct xfs_mount *mp = ip->i_mount;
+ int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(xfs_isiflocked(ip));
@@ -3557,9 +3558,14 @@ xfs_iflush_int(
if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
ip->i_d.di_flushiter = 0;
- xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
- if (XFS_IFORK_Q(ip))
- xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+ error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+ if (error)
+ return error;
+ if (XFS_IFORK_Q(ip)) {
+ error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+ if (error)
+ return error;
+ }
xfs_inobp_check(mp, bp);
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 41662fb14e87..288ee5b840d7 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -630,6 +630,11 @@ retry:
goto out_unlock;
}
+ /*
+ * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+ * them out if the write happens to fail.
+ */
+ iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done:
if (isnullstartblock(got.br_startblock))
@@ -1071,16 +1076,22 @@ xfs_file_iomap_end_delalloc(
struct xfs_inode *ip,
loff_t offset,
loff_t length,
- ssize_t written)
+ ssize_t written,
+ struct iomap *iomap)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_fsb;
xfs_fileoff_t end_fsb;
int error = 0;
- /* behave as if the write failed if drop writes is enabled */
- if (xfs_mp_drop_writes(mp))
+ /*
+ * Behave as if the write failed if drop writes is enabled. Set the NEW
+ * flag to force delalloc cleanup.
+ */
+ if (xfs_mp_drop_writes(mp)) {
+ iomap->flags |= IOMAP_F_NEW;
written = 0;
+ }
/*
* start_fsb refers to the first unused block after a short write. If
@@ -1094,14 +1105,14 @@ xfs_file_iomap_end_delalloc(
end_fsb = XFS_B_TO_FSB(mp, offset + length);
/*
- * Trim back delalloc blocks if we didn't manage to write the whole
- * range reserved.
+ * Trim delalloc blocks if they were allocated by this write and we
+ * didn't manage to write the whole range.
*
* We don't need to care about racing delalloc as we hold i_mutex
* across the reserve/allocate/unreserve calls. If there are delalloc
* blocks in the range, they are ours.
*/
- if (start_fsb < end_fsb) {
+ if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
XFS_FSB_TO_B(mp, end_fsb) - 1);
@@ -1131,7 +1142,7 @@ xfs_file_iomap_end(
{
if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
- length, written);
+ length, written, iomap);
return 0;
}
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 66e881790c17..2a6d9b1558e0 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -361,7 +361,6 @@ xfs_bulkstat(
xfs_agino_t agino; /* inode # in allocation group */
xfs_agnumber_t agno; /* allocation group number */
xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
- size_t irbsize; /* size of irec buffer in bytes */
xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
int nirbuf; /* size of irbuf */
int ubcount; /* size of user's buffer */
@@ -388,11 +387,10 @@ xfs_bulkstat(
*ubcountp = 0;
*done = 0;
- irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
+ irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);
if (!irbuf)
return -ENOMEM;
-
- nirbuf = irbsize / sizeof(*irbuf);
+ nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);
/*
* Loop over the allocation groups, starting from the last
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 450bde68bb75..688ebff1f663 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -513,8 +513,7 @@ STATIC void
xfs_set_inoalignment(xfs_mount_t *mp)
{
if (xfs_sb_version_hasalign(&mp->m_sb) &&
- mp->m_sb.sb_inoalignmt >=
- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+ mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
else
mp->m_inoalign_mask = 0;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index da6d08fb359c..4a84c5ea266d 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -548,14 +548,18 @@ xfs_reflink_trim_irec_to_next_cow(
}
/*
- * Cancel all pending CoW reservations for some block range of an inode.
+ * Cancel CoW reservations for some block range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
*/
int
xfs_reflink_cancel_cow_blocks(
struct xfs_inode *ip,
struct xfs_trans **tpp,
xfs_fileoff_t offset_fsb,
- xfs_fileoff_t end_fsb)
+ xfs_fileoff_t end_fsb,
+ bool cancel_real)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got, del;
@@ -579,7 +583,7 @@ xfs_reflink_cancel_cow_blocks(
&idx, &got, &del);
if (error)
break;
- } else {
+ } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
xfs_trans_ijoin(*tpp, ip, 0);
xfs_defer_init(&dfops, &firstfsb);
@@ -621,13 +625,17 @@ xfs_reflink_cancel_cow_blocks(
}
/*
- * Cancel all pending CoW reservations for some byte range of an inode.
+ * Cancel CoW reservations for some byte range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
*/
int
xfs_reflink_cancel_cow_range(
struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t count)
+ xfs_off_t count,
+ bool cancel_real)
{
struct xfs_trans *tp;
xfs_fileoff_t offset_fsb;
@@ -653,7 +661,8 @@ xfs_reflink_cancel_cow_range(
xfs_trans_ijoin(tp, ip, 0);
/* Scrape out the old CoW reservations */
- error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb);
+ error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
+ cancel_real);
if (error)
goto out_cancel;
@@ -1450,7 +1459,7 @@ next:
* We didn't find any shared blocks so turn off the reflink flag.
* First, get rid of any leftover CoW mappings.
*/
- error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF);
+ error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
if (error)
return error;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 33ac9b8db683..d29a7967f029 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -39,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
- xfs_fileoff_t end_fsb);
+ xfs_fileoff_t end_fsb, bool cancel_real);
extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t count);
+ xfs_off_t count, bool cancel_real);
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 890862f2447c..685c042a120f 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -953,7 +953,7 @@ xfs_fs_destroy_inode(
XFS_STATS_INC(ip->i_mount, vn_remove);
if (xfs_is_reflink_inode(ip)) {
- error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+ error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
xfs_warn(ip->i_mount,
"Error %d while evicting CoW blocks for inode %llu.",