aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/file.c5
-rw-r--r--fs/afs/flock.c4
-rw-r--r--fs/afs/fs_probe.c2
-rw-r--r--fs/afs/inode.c3
-rw-r--r--fs/afs/protocol_yfs.h11
-rw-r--r--fs/afs/rxrpc.c53
-rw-r--r--fs/afs/server_list.c4
-rw-r--r--fs/afs/vl_probe.c2
-rw-r--r--fs/afs/yfsclient.c2
-rw-r--r--fs/aio.c293
-rw-r--r--fs/autofs/autofs_i.h13
-rw-r--r--fs/autofs/dev-ioctl.c27
-rw-r--r--fs/autofs/init.c2
-rw-r--r--fs/autofs/inode.c67
-rw-r--r--fs/autofs/root.c16
-rw-r--r--fs/autofs/waitq.c10
-rw-r--r--fs/bfs/bfs.h11
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/bfs/inode.c65
-rw-r--r--fs/binfmt_aout.c4
-rw-r--r--fs/binfmt_script.c10
-rw-r--r--fs/block_dev.c72
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/extent_io.c3
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/btrfs/super.c82
-rw-r--r--fs/buffer.c12
-rw-r--r--fs/ceph/addr.c10
-rw-r--r--fs/ceph/caps.c75
-rw-r--r--fs/ceph/inode.c60
-rw-r--r--fs/ceph/mds_client.c129
-rw-r--r--fs/ceph/mds_client.h16
-rw-r--r--fs/ceph/mdsmap.c1
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/ceph/super.h2
-rw-r--r--fs/cifs/Kconfig5
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/cifs_debug.c12
-rw-r--r--fs/cifs/cifs_dfs_ref.c138
-rw-r--r--fs/cifs/cifs_fs_sb.h9
-rw-r--r--fs/cifs/cifsencrypt.c13
-rw-r--r--fs/cifs/cifsfs.c17
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h35
-rw-r--r--fs/cifs/cifsproto.h28
-rw-r--r--fs/cifs/cifssmb.c104
-rw-r--r--fs/cifs/connect.c925
-rw-r--r--fs/cifs/dfs_cache.c1368
-rw-r--r--fs/cifs/dfs_cache.h97
-rw-r--r--fs/cifs/file.c64
-rw-r--r--fs/cifs/inode.c54
-rw-r--r--fs/cifs/misc.c68
-rw-r--r--fs/cifs/readdir.c9
-rw-r--r--fs/cifs/sess.c4
-rw-r--r--fs/cifs/smb1ops.c15
-rw-r--r--fs/cifs/smb2file.c8
-rw-r--r--fs/cifs/smb2inode.c18
-rw-r--r--fs/cifs/smb2maperror.c4
-rw-r--r--fs/cifs/smb2ops.c317
-rw-r--r--fs/cifs/smb2pdu.c165
-rw-r--r--fs/cifs/smb2pdu.h3
-rw-r--r--fs/cifs/smb2proto.h12
-rw-r--r--fs/cifs/smbdirect.c2
-rw-r--r--fs/cifs/transport.c130
-rw-r--r--fs/crypto/crypto.c28
-rw-r--r--fs/crypto/fname.c22
-rw-r--r--fs/crypto/fscrypt_private.h67
-rw-r--r--fs/crypto/keyinfo.c351
-rw-r--r--fs/crypto/policy.c5
-rw-r--r--fs/dax.c24
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/dlm/ast.c10
-rw-r--r--fs/dlm/lock.c17
-rw-r--r--fs/dlm/lockspace.c9
-rw-r--r--fs/dlm/member.c7
-rw-r--r--fs/dlm/memory.c9
-rw-r--r--fs/dlm/user.c5
-rw-r--r--fs/eventpoll.c272
-rw-r--r--fs/exec.c111
-rw-r--r--fs/exofs/super.c37
-rw-r--r--fs/ext2/super.c12
-rw-r--r--fs/ext2/xattr.c3
-rw-r--r--fs/ext4/acl.c3
-rw-r--r--fs/ext4/ext4.h17
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inline.c11
-rw-r--r--fs/ext4/inode.c66
-rw-r--r--fs/ext4/ioctl.c2
-rw-r--r--fs/ext4/migrate.c48
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/ext4/page-io.c2
-rw-r--r--fs/ext4/readpage.c2
-rw-r--r--fs/ext4/resize.c79
-rw-r--r--fs/ext4/super.c94
-rw-r--r--fs/ext4/xattr.c83
-rw-r--r--fs/f2fs/acl.c20
-rw-r--r--fs/f2fs/checkpoint.c33
-rw-r--r--fs/f2fs/data.c162
-rw-r--r--fs/f2fs/debug.c25
-rw-r--r--fs/f2fs/dir.c20
-rw-r--r--fs/f2fs/f2fs.h98
-rw-r--r--fs/f2fs/file.c42
-rw-r--r--fs/f2fs/gc.c81
-rw-r--r--fs/f2fs/inline.c20
-rw-r--r--fs/f2fs/inode.c22
-rw-r--r--fs/f2fs/namei.c8
-rw-r--r--fs/f2fs/node.c40
-rw-r--r--fs/f2fs/node.h2
-rw-r--r--fs/f2fs/recovery.c4
-rw-r--r--fs/f2fs/segment.c101
-rw-r--r--fs/f2fs/segment.h2
-rw-r--r--fs/f2fs/shrinker.c2
-rw-r--r--fs/f2fs/super.c170
-rw-r--r--fs/f2fs/sysfs.c27
-rw-r--r--fs/f2fs/xattr.c22
-rw-r--r--fs/fat/cache.c2
-rw-r--r--fs/fat/dir.c8
-rw-r--r--fs/fat/fat.h30
-rw-r--r--fs/fat/fatent.c16
-rw-r--r--fs/fat/inode.c26
-rw-r--r--fs/fat/misc.c2
-rw-r--r--fs/file.c31
-rw-r--r--fs/file_table.c7
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/aops.c16
-rw-r--r--fs/gfs2/bmap.c10
-rw-r--r--fs/gfs2/file.c10
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/gfs2/glock.h2
-rw-r--r--fs/gfs2/glops.c17
-rw-r--r--fs/gfs2/incore.h3
-rw-r--r--fs/gfs2/inode.c18
-rw-r--r--fs/gfs2/inode.h10
-rw-r--r--fs/gfs2/log.c5
-rw-r--r--fs/gfs2/log.h5
-rw-r--r--fs/gfs2/lops.c257
-rw-r--r--fs/gfs2/lops.h4
-rw-r--r--fs/gfs2/ops_fstype.c1
-rw-r--r--fs/gfs2/recovery.c178
-rw-r--r--fs/gfs2/recovery.h5
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/gfs2/rgrp.h2
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/gfs2/trans.c8
-rw-r--r--fs/hfsplus/dir.c1
-rw-r--r--fs/hfsplus/hfsplus_fs.h2
-rw-r--r--fs/hfsplus/inode.c21
-rw-r--r--fs/inode.c4
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/iomap.c39
-rw-r--r--fs/jbd2/commit.c3
-rw-r--r--fs/jbd2/transaction.c45
-rw-r--r--fs/jffs2/super.c3
-rw-r--r--fs/kernfs/file.c23
-rw-r--r--fs/lockd/clnt4xdr.c22
-rw-r--r--fs/lockd/clntproc.c8
-rw-r--r--fs/lockd/clntxdr.c22
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/lockd/xdr.c4
-rw-r--r--fs/lockd/xdr4.c4
-rw-r--r--fs/locks.c344
-rw-r--r--fs/namei.c3
-rw-r--r--fs/namespace.c160
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/callback.c10
-rw-r--r--fs/nfs/client.c9
-rw-r--r--fs/nfs/delegation.c28
-rw-r--r--fs/nfs/delegation.h10
-rw-r--r--fs/nfs/dir.c59
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c64
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h8
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c16
-rw-r--r--fs/nfs/inode.c13
-rw-r--r--fs/nfs/internal.h17
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs4_fs.h68
-rw-r--r--fs/nfs/nfs4client.c4
-rw-r--r--fs/nfs/nfs4file.c8
-rw-r--r--fs/nfs/nfs4proc.c164
-rw-r--r--fs/nfs/nfs4renewd.c9
-rw-r--r--fs/nfs/nfs4session.c5
-rw-r--r--fs/nfs/nfs4state.c131
-rw-r--r--fs/nfs/nfs4trace.h456
-rw-r--r--fs/nfs/pagelist.c4
-rw-r--r--fs/nfs/pnfs.c14
-rw-r--r--fs/nfs/pnfs.h10
-rw-r--r--fs/nfs/pnfs_dev.c4
-rw-r--r--fs/nfs/pnfs_nfs.c2
-rw-r--r--fs/nfs/proc.c2
-rw-r--r--fs/nfs/super.c47
-rw-r--r--fs/nfs/unlink.c20
-rw-r--r--fs/nfs/write.c26
-rw-r--r--fs/nfsd/nfs4callback.c31
-rw-r--r--fs/nfsd/nfs4layouts.c1
-rw-r--r--fs/nfsd/nfs4proc.c15
-rw-r--r--fs/nfsd/nfs4recover.c17
-rw-r--r--fs/nfsd/nfs4state.c16
-rw-r--r--fs/nfsd/nfscache.c2
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/nfsd/state.h2
-rw-r--r--fs/nfsd/vfs.c17
-rw-r--r--fs/notify/fanotify/fanotify.c32
-rw-r--r--fs/notify/fanotify/fanotify_user.c12
-rw-r--r--fs/notify/fdinfo.c1
-rw-r--r--fs/notify/fsnotify.c2
-rw-r--r--fs/ntfs/malloc.h2
-rw-r--r--fs/ocfs2/Makefile2
-rw-r--r--fs/ocfs2/aops.c3
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c17
-rw-r--r--fs/ocfs2/dlm/Makefile2
-rw-r--r--fs/ocfs2/dlmfs/Makefile2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c7
-rw-r--r--fs/ocfs2/journal.c6
-rw-r--r--fs/ocfs2/localalloc.c12
-rw-r--r--fs/ocfs2/locks.c10
-rw-r--r--fs/openpromfs/inode.c11
-rw-r--r--fs/orangefs/inode.c2
-rw-r--r--fs/orangefs/orangefs-bufmap.c2
-rw-r--r--fs/pnode.c1
-rw-r--r--fs/proc/array.c10
-rw-r--r--fs/proc/base.c20
-rw-r--r--fs/proc/inode.c4
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/proc_sysctl.c13
-rw-r--r--fs/proc/task_mmu.c9
-rw-r--r--fs/proc/util.c1
-rw-r--r--fs/pstore/ftrace.c2
-rw-r--r--fs/pstore/inode.c51
-rw-r--r--fs/pstore/platform.c173
-rw-r--r--fs/pstore/pmsg.c2
-rw-r--r--fs/pstore/ram.c63
-rw-r--r--fs/pstore/ram_core.c47
-rw-r--r--fs/quota/quota.c3
-rw-r--r--fs/read_write.c13
-rw-r--r--fs/readdir.c10
-rw-r--r--fs/select.c367
-rw-r--r--fs/super.c24
-rw-r--r--fs/sysfs/dir.c3
-rw-r--r--fs/sysfs/file.c10
-rw-r--r--fs/sysfs/group.c3
-rw-r--r--fs/sysfs/symlink.c3
-rw-r--r--fs/ubifs/Kconfig16
-rw-r--r--fs/ubifs/auth.c5
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/ubifs/lpt.c12
-rw-r--r--fs/ubifs/replay.c72
-rw-r--r--fs/ubifs/sb.c13
-rw-r--r--fs/udf/inode.c6
-rw-r--r--fs/userfaultfd.c23
-rw-r--r--fs/xfs/libxfs/xfs_ag.c9
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c79
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h4
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h4
-rw-r--r--fs/xfs/libxfs/xfs_defer.c67
-rw-r--r--fs/xfs/libxfs/xfs_defer.h37
-rw-r--r--fs/xfs/libxfs/xfs_format.h12
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c54
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c7
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c240
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h54
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c6
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c14
-rw-r--r--fs/xfs/libxfs/xfs_types.c9
-rw-r--r--fs/xfs/libxfs/xfs_types.h22
-rw-r--r--fs/xfs/scrub/agheader.c25
-rw-r--r--fs/xfs/scrub/agheader_repair.c5
-rw-r--r--fs/xfs/scrub/alloc.c4
-rw-r--r--fs/xfs/scrub/btree.c45
-rw-r--r--fs/xfs/scrub/btree.h22
-rw-r--r--fs/xfs/scrub/common.c14
-rw-r--r--fs/xfs/scrub/common.h2
-rw-r--r--fs/xfs/scrub/ialloc.c64
-rw-r--r--fs/xfs/scrub/inode.c4
-rw-r--r--fs/xfs/scrub/refcount.c16
-rw-r--r--fs/xfs/scrub/repair.c54
-rw-r--r--fs/xfs/scrub/repair.h7
-rw-r--r--fs/xfs/scrub/rmap.c35
-rw-r--r--fs/xfs/scrub/scrub.h4
-rw-r--r--fs/xfs/scrub/trace.h131
-rw-r--r--fs/xfs/xfs_aops.h3
-rw-r--r--fs/xfs/xfs_buf.c1
-rw-r--r--fs/xfs/xfs_extfree_item.c5
-rw-r--r--fs/xfs/xfs_fsops.c3
-rw-r--r--fs/xfs/xfs_inode.c16
-rw-r--r--fs/xfs/xfs_ioctl32.c58
-rw-r--r--fs/xfs/xfs_itable.c14
-rw-r--r--fs/xfs/xfs_log_recover.c8
-rw-r--r--fs/xfs/xfs_mount.c4
-rw-r--r--fs/xfs/xfs_mount.h11
-rw-r--r--fs/xfs/xfs_reflink.c232
-rw-r--r--fs/xfs/xfs_rtalloc.c57
-rw-r--r--fs/xfs/xfs_super.c10
-rw-r--r--fs/xfs/xfs_symlink.c33
-rw-r--r--fs/xfs/xfs_trace.h51
-rw-r--r--fs/xfs/xfs_trans.h7
-rw-r--r--fs/xfs/xfs_trans_bmap.c11
-rw-r--r--fs/xfs/xfs_trans_extfree.c40
-rw-r--r--fs/xfs/xfs_trans_refcount.c11
-rw-r--r--fs/xfs/xfs_trans_rmap.c11
304 files changed, 8236 insertions, 4266 deletions
diff --git a/fs/afs/file.c b/fs/afs/file.c
index d6bc3f5d784b..323ae9912203 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -17,6 +17,7 @@
#include <linux/writeback.h>
#include <linux/gfp.h>
#include <linux/task_io_accounting_ops.h>
+#include <linux/mm.h>
#include "internal.h"
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
@@ -441,7 +442,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
/* Count the number of contiguous pages at the front of the list. Note
* that the list goes prev-wards rather than next-wards.
*/
- first = list_entry(pages->prev, struct page, lru);
+ first = lru_to_page(pages);
index = first->index + 1;
n = 1;
for (p = first->lru.prev; p != pages; p = p->prev) {
@@ -473,7 +474,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
* page at the end of the file.
*/
do {
- page = list_entry(pages->prev, struct page, lru);
+ page = lru_to_page(pages);
list_del(&page->lru);
index = page->index;
if (add_to_page_cache_lru(page, mapping, index,
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 0568fd986821..e432bd27a2e7 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -208,7 +208,7 @@ again:
/* The new front of the queue now owns the state variables. */
next = list_entry(vnode->pending_locks.next,
struct file_lock, fl_u.afs.link);
- vnode->lock_key = afs_file_key(next->fl_file);
+ vnode->lock_key = key_get(afs_file_key(next->fl_file));
vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
goto again;
@@ -413,7 +413,7 @@ static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl)
/* The new front of the queue now owns the state variables. */
next = list_entry(vnode->pending_locks.next,
struct file_lock, fl_u.afs.link);
- vnode->lock_key = afs_file_key(next->fl_file);
+ vnode->lock_key = key_get(afs_file_key(next->fl_file));
vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
afs_lock_may_be_available(vnode);
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index fde6b4d4121e..3a9eaec06756 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -247,7 +247,7 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
}
}
- if (!still_probing || unlikely(signal_pending(current)))
+ if (!still_probing || signal_pending(current))
goto stop;
schedule();
}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 6b17d3620414..1a4ce07fb406 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -414,7 +414,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
valid = true;
} else {
- vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
vnode->cb_v_break = vnode->volume->cb_v_break;
valid = false;
}
@@ -546,6 +545,8 @@ void afs_evict_inode(struct inode *inode)
#endif
afs_put_permits(rcu_access_pointer(vnode->permit_cache));
+ key_put(vnode->lock_key);
+ vnode->lock_key = NULL;
_leave("");
}
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
index 07bc10f076aa..d443e2bfa094 100644
--- a/fs/afs/protocol_yfs.h
+++ b/fs/afs/protocol_yfs.h
@@ -161,3 +161,14 @@ struct yfs_xdr_YFSStoreVolumeStatus {
struct yfs_xdr_u64 max_quota;
struct yfs_xdr_u64 file_quota;
} __packed;
+
+enum yfs_lock_type {
+ yfs_LockNone = -1,
+ yfs_LockRead = 0,
+ yfs_LockWrite = 1,
+ yfs_LockExtend = 2,
+ yfs_LockRelease = 3,
+ yfs_LockMandatoryRead = 0x100,
+ yfs_LockMandatoryWrite = 0x101,
+ yfs_LockMandatoryExtend = 0x102,
+};
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a7b44863d502..2c588f9bbbda 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -23,6 +23,7 @@ struct workqueue_struct *afs_async_calls;
static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_delete_async_call(struct work_struct *);
static void afs_process_async_call(struct work_struct *);
static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
@@ -203,20 +204,26 @@ void afs_put_call(struct afs_call *call)
}
}
+static struct afs_call *afs_get_call(struct afs_call *call,
+ enum afs_call_trace why)
+{
+ int u = atomic_inc_return(&call->usage);
+
+ trace_afs_call(call, why, u,
+ atomic_read(&call->net->nr_outstanding_calls),
+ __builtin_return_address(0));
+ return call;
+}
+
/*
* Queue the call for actual work.
*/
static void afs_queue_call_work(struct afs_call *call)
{
if (call->type->work) {
- int u = atomic_inc_return(&call->usage);
-
- trace_afs_call(call, afs_call_trace_work, u,
- atomic_read(&call->net->nr_outstanding_calls),
- __builtin_return_address(0));
-
INIT_WORK(&call->work, call->type->work);
+ afs_get_call(call, afs_call_trace_work);
if (!queue_work(afs_wq, &call->work))
afs_put_call(call);
}
@@ -398,6 +405,12 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
}
}
+ /* If the call is going to be asynchronous, we need an extra ref for
+ * the call to hold itself so the caller need not hang on to its ref.
+ */
+ if (call->async)
+ afs_get_call(call, afs_call_trace_get);
+
/* create a call */
rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
(unsigned long)call,
@@ -438,15 +451,17 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
goto error_do_abort;
}
- /* at this point, an async call may no longer exist as it may have
- * already completed */
- if (call->async)
+ /* Note that at this point, we may have received the reply or an abort
+ * - and an asynchronous call may already have completed.
+ */
+ if (call->async) {
+ afs_put_call(call);
return -EINPROGRESS;
+ }
return afs_wait_for_call_to_complete(call, ac);
error_do_abort:
- call->state = AFS_CALL_COMPLETE;
if (ret != -ECONNABORTED) {
rxrpc_kernel_abort_call(call->net->socket, rxcall,
RX_USER_ABORT, ret, "KSD");
@@ -463,8 +478,24 @@ error_do_abort:
error_kill_call:
if (call->type->done)
call->type->done(call);
- afs_put_call(call);
+
+ /* We need to dispose of the extra ref we grabbed for an async call.
+ * The call, however, might be queued on afs_async_calls and we need to
+ * make sure we don't get any more notifications that might requeue it.
+ */
+ if (call->rxcall) {
+ rxrpc_kernel_end_call(call->net->socket, call->rxcall);
+ call->rxcall = NULL;
+ }
+ if (call->async) {
+ if (cancel_work_sync(&call->async_work))
+ afs_put_call(call);
+ afs_put_call(call);
+ }
+
ac->error = ret;
+ call->state = AFS_CALL_COMPLETE;
+ afs_put_call(call);
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 95d0761cdb34..155dc14caef9 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -42,9 +42,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
if (vldb->fs_mask[i] & type_mask)
nr_servers++;
- slist = kzalloc(sizeof(struct afs_server_list) +
- sizeof(struct afs_server_entry) * nr_servers,
- GFP_KERNEL);
+ slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL);
if (!slist)
goto error;
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
index f0b032976487..f402ee8171a1 100644
--- a/fs/afs/vl_probe.c
+++ b/fs/afs/vl_probe.c
@@ -248,7 +248,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
}
}
- if (!still_probing || unlikely(signal_pending(current)))
+ if (!still_probing || signal_pending(current))
goto stop;
schedule();
}
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 12658c1363ae..5aa57929e8c2 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -803,7 +803,7 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc,
bp = xdr_encode_YFSFid(bp, &vnode->fid);
bp = xdr_encode_string(bp, name, namesz);
bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
- bp = xdr_encode_u32(bp, 0); /* ViceLockType */
+ bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */
yfs_check_req(call, bp);
afs_use_fs_server(call, fc->cbi);
diff --git a/fs/aio.c b/fs/aio.c
index aac9659381d2..b906ff70c90f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -70,6 +70,12 @@ struct aio_ring {
struct io_event io_events[0];
}; /* 128 bytes + ring size */
+/*
+ * Plugging is meant to work with larger batches of IOs. If we don't
+ * have more than the below, then don't bother setting up a plug.
+ */
+#define AIO_PLUG_THRESHOLD 2
+
#define AIO_RING_PAGES 8
struct kioctx_table {
@@ -409,7 +415,7 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
BUG_ON(PageWriteback(old));
get_page(new);
- rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
+ rc = migrate_page_move_mapping(mapping, new, old, mode, 1);
if (rc != MIGRATEPAGE_SUCCESS) {
put_page(new);
goto out_unlock;
@@ -902,7 +908,7 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
local_irq_restore(flags);
}
-static bool get_reqs_available(struct kioctx *ctx)
+static bool __get_reqs_available(struct kioctx *ctx)
{
struct kioctx_cpu *kcpu;
bool ret = false;
@@ -994,6 +1000,14 @@ static void user_refill_reqs_available(struct kioctx *ctx)
spin_unlock_irq(&ctx->completion_lock);
}
+static bool get_reqs_available(struct kioctx *ctx)
+{
+ if (__get_reqs_available(ctx))
+ return true;
+ user_refill_reqs_available(ctx);
+ return __get_reqs_available(ctx);
+}
+
/* aio_get_req
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
@@ -1002,24 +1016,16 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
struct aio_kiocb *req;
- if (!get_reqs_available(ctx)) {
- user_refill_reqs_available(ctx);
- if (!get_reqs_available(ctx))
- return NULL;
- }
-
- req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
+ req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
if (unlikely(!req))
- goto out_put;
+ return NULL;
percpu_ref_get(&ctx->reqs);
+ req->ki_ctx = ctx;
INIT_LIST_HEAD(&req->ki_list);
refcount_set(&req->ki_refcnt, 0);
- req->ki_ctx = ctx;
+ req->ki_eventfd = NULL;
return req;
-out_put:
- put_reqs_available(ctx, 1);
- return NULL;
}
static struct kioctx *lookup_ioctx(unsigned long ctx_id)
@@ -1059,6 +1065,15 @@ static inline void iocb_put(struct aio_kiocb *iocb)
}
}
+static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
+ long res, long res2)
+{
+ ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
+ ev->data = iocb->ki_user_data;
+ ev->res = res;
+ ev->res2 = res2;
+}
+
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
@@ -1086,10 +1101,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
- event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
- event->data = iocb->ki_user_data;
- event->res = res;
- event->res2 = res2;
+ aio_fill_event(event, iocb, res, res2);
kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
@@ -1416,7 +1428,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
aio_complete(iocb, res, res2);
}
-static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
+static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
{
int ret;
@@ -1438,21 +1450,26 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
ret = ioprio_check_cap(iocb->aio_reqprio);
if (ret) {
pr_debug("aio ioprio check cap error: %d\n", ret);
- fput(req->ki_filp);
- return ret;
+ goto out_fput;
}
req->ki_ioprio = iocb->aio_reqprio;
} else
- req->ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+ req->ki_ioprio = get_current_ioprio();
ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
if (unlikely(ret))
- fput(req->ki_filp);
+ goto out_fput;
+
+ req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */
+ return 0;
+
+out_fput:
+ fput(req->ki_filp);
return ret;
}
-static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
+static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
bool vectored, bool compat, struct iov_iter *iter)
{
void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
@@ -1487,12 +1504,12 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
ret = -EINTR;
/*FALLTHRU*/
default:
- aio_complete_rw(req, ret, 0);
+ req->ki_complete(req, ret, 0);
}
}
-static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
- bool compat)
+static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
+ bool vectored, bool compat)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
@@ -1524,8 +1541,8 @@ out_fput:
return ret;
}
-static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
- bool compat)
+static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
+ bool vectored, bool compat)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
@@ -1580,7 +1597,8 @@ static void aio_fsync_work(struct work_struct *work)
aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
}
-static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
+static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
+ bool datasync)
{
if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
iocb->aio_rw_flags))
@@ -1708,7 +1726,7 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
add_wait_queue(head, &pt->iocb->poll.wait);
}
-static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
+static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
{
struct kioctx *ctx = aiocb->ki_ctx;
struct poll_iocb *req = &aiocb->poll;
@@ -1728,6 +1746,10 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
if (unlikely(!req->file))
return -EBADF;
+ req->head = NULL;
+ req->woken = false;
+ req->cancelled = false;
+
apt.pt._qproc = aio_poll_queue_proc;
apt.pt._key = req->events;
apt.iocb = aiocb;
@@ -1776,44 +1798,44 @@ out:
return 0;
}
-static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- bool compat)
+static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
+ struct iocb __user *user_iocb, bool compat)
{
struct aio_kiocb *req;
- struct iocb iocb;
ssize_t ret;
- if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
- return -EFAULT;
-
/* enforce forwards compatibility on users */
- if (unlikely(iocb.aio_reserved2)) {
+ if (unlikely(iocb->aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
/* prevent overflows */
if (unlikely(
- (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
- (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
- ((ssize_t)iocb.aio_nbytes < 0)
+ (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
+ (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
+ ((ssize_t)iocb->aio_nbytes < 0)
)) {
pr_debug("EINVAL: overflow check\n");
return -EINVAL;
}
+ if (!get_reqs_available(ctx))
+ return -EAGAIN;
+
+ ret = -EAGAIN;
req = aio_get_req(ctx);
if (unlikely(!req))
- return -EAGAIN;
+ goto out_put_reqs_available;
- if (iocb.aio_flags & IOCB_FLAG_RESFD) {
+ if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
* instance of the file* now. The file descriptor must be
* an eventfd() fd, and will be signaled for each completed
* event using the eventfd_signal() function.
*/
- req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd);
+ req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
if (IS_ERR(req->ki_eventfd)) {
ret = PTR_ERR(req->ki_eventfd);
req->ki_eventfd = NULL;
@@ -1828,32 +1850,32 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
}
req->ki_user_iocb = user_iocb;
- req->ki_user_data = iocb.aio_data;
+ req->ki_user_data = iocb->aio_data;
- switch (iocb.aio_lio_opcode) {
+ switch (iocb->aio_lio_opcode) {
case IOCB_CMD_PREAD:
- ret = aio_read(&req->rw, &iocb, false, compat);
+ ret = aio_read(&req->rw, iocb, false, compat);
break;
case IOCB_CMD_PWRITE:
- ret = aio_write(&req->rw, &iocb, false, compat);
+ ret = aio_write(&req->rw, iocb, false, compat);
break;
case IOCB_CMD_PREADV:
- ret = aio_read(&req->rw, &iocb, true, compat);
+ ret = aio_read(&req->rw, iocb, true, compat);
break;
case IOCB_CMD_PWRITEV:
- ret = aio_write(&req->rw, &iocb, true, compat);
+ ret = aio_write(&req->rw, iocb, true, compat);
break;
case IOCB_CMD_FSYNC:
- ret = aio_fsync(&req->fsync, &iocb, false);
+ ret = aio_fsync(&req->fsync, iocb, false);
break;
case IOCB_CMD_FDSYNC:
- ret = aio_fsync(&req->fsync, &iocb, true);
+ ret = aio_fsync(&req->fsync, iocb, true);
break;
case IOCB_CMD_POLL:
- ret = aio_poll(req, &iocb);
+ ret = aio_poll(req, iocb);
break;
default:
- pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
+ pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
ret = -EINVAL;
break;
}
@@ -1867,14 +1889,25 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
return 0;
out_put_req:
- put_reqs_available(ctx, 1);
- percpu_ref_put(&ctx->reqs);
if (req->ki_eventfd)
eventfd_ctx_put(req->ki_eventfd);
- kmem_cache_free(kiocb_cachep, req);
+ iocb_put(req);
+out_put_reqs_available:
+ put_reqs_available(ctx, 1);
return ret;
}
+static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
+ bool compat)
+{
+ struct iocb iocb;
+
+ if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
+ return -EFAULT;
+
+ return __io_submit_one(ctx, &iocb, user_iocb, compat);
+}
+
/* sys_io_submit:
* Queue the nr iocbs pointed to by iocbpp for processing. Returns
* the number of iocbs queued. May return -EINVAL if the aio_context
@@ -1907,7 +1940,8 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
if (nr > ctx->nr_events)
nr = ctx->nr_events;
- blk_start_plug(&plug);
+ if (nr > AIO_PLUG_THRESHOLD)
+ blk_start_plug(&plug);
for (i = 0; i < nr; i++) {
struct iocb __user *user_iocb;
@@ -1920,7 +1954,8 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
if (ret)
break;
}
- blk_finish_plug(&plug);
+ if (nr > AIO_PLUG_THRESHOLD)
+ blk_finish_plug(&plug);
percpu_ref_put(&ctx->users);
return i ? i : ret;
@@ -1947,7 +1982,8 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
if (nr > ctx->nr_events)
nr = ctx->nr_events;
- blk_start_plug(&plug);
+ if (nr > AIO_PLUG_THRESHOLD)
+ blk_start_plug(&plug);
for (i = 0; i < nr; i++) {
compat_uptr_t user_iocb;
@@ -1960,7 +1996,8 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
if (ret)
break;
}
- blk_finish_plug(&plug);
+ if (nr > AIO_PLUG_THRESHOLD)
+ blk_finish_plug(&plug);
percpu_ref_put(&ctx->users);
return i ? i : ret;
@@ -2065,11 +2102,13 @@ static long do_io_getevents(aio_context_t ctx_id,
* specifies an infinite timeout. Note that the timeout pointed to by
* timeout is relative. Will fail with -ENOSYS if not implemented.
*/
+#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT)
+
SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
long, min_nr,
long, nr,
struct io_event __user *, events,
- struct timespec __user *, timeout)
+ struct __kernel_timespec __user *, timeout)
{
struct timespec64 ts;
int ret;
@@ -2083,6 +2122,8 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
return ret;
}
+#endif
+
struct __aio_sigset {
const sigset_t __user *sigmask;
size_t sigsetsize;
@@ -2093,7 +2134,7 @@ SYSCALL_DEFINE6(io_pgetevents,
long, min_nr,
long, nr,
struct io_event __user *, events,
- struct timespec __user *, timeout,
+ struct __kernel_timespec __user *, timeout,
const struct __aio_sigset __user *, usig)
{
struct __aio_sigset ksig = { NULL, };
@@ -2107,33 +2148,56 @@ SYSCALL_DEFINE6(io_pgetevents,
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
- if (ksig.sigmask) {
- if (ksig.sigsetsize != sizeof(sigset_t))
- return -EINVAL;
- if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask)))
- return -EFAULT;
- sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
+ ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize);
+ if (ret)
+ return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
- if (signal_pending(current)) {
- if (ksig.sigmask) {
- current->saved_sigmask = sigsaved;
- set_restore_sigmask();
- }
+ restore_user_sigmask(ksig.sigmask, &sigsaved);
+ if (signal_pending(current) && !ret)
+ ret = -ERESTARTNOHAND;
- if (!ret)
- ret = -ERESTARTNOHAND;
- } else {
- if (ksig.sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
- }
+ return ret;
+}
+
+#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
+
+SYSCALL_DEFINE6(io_pgetevents_time32,
+ aio_context_t, ctx_id,
+ long, min_nr,
+ long, nr,
+ struct io_event __user *, events,
+ struct old_timespec32 __user *, timeout,
+ const struct __aio_sigset __user *, usig)
+{
+ struct __aio_sigset ksig = { NULL, };
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 ts;
+ int ret;
+
+ if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
+ return -EFAULT;
+
+ if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+ return -EFAULT;
+
+
+ ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize);
+ if (ret)
+ return ret;
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+ restore_user_sigmask(ksig.sigmask, &sigsaved);
+ if (signal_pending(current) && !ret)
+ ret = -ERESTARTNOHAND;
return ret;
}
-#ifdef CONFIG_COMPAT
+#endif
+
+#if defined(CONFIG_COMPAT_32BIT_TIME)
+
COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
compat_long_t, min_nr,
compat_long_t, nr,
@@ -2152,12 +2216,17 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
return ret;
}
+#endif
+
+#ifdef CONFIG_COMPAT
struct __compat_aio_sigset {
compat_sigset_t __user *sigmask;
compat_size_t sigsetsize;
};
+#if defined(CONFIG_COMPAT_32BIT_TIME)
+
COMPAT_SYSCALL_DEFINE6(io_pgetevents,
compat_aio_context_t, ctx_id,
compat_long_t, min_nr,
@@ -2177,27 +2246,47 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
- if (ksig.sigmask) {
- if (ksig.sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
- if (get_compat_sigset(&ksigmask, ksig.sigmask))
- return -EFAULT;
- sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
+ ret = set_compat_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize);
+ if (ret)
+ return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
- if (signal_pending(current)) {
- if (ksig.sigmask) {
- current->saved_sigmask = sigsaved;
- set_restore_sigmask();
- }
- if (!ret)
- ret = -ERESTARTNOHAND;
- } else {
- if (ksig.sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
- }
+ restore_user_sigmask(ksig.sigmask, &sigsaved);
+ if (signal_pending(current) && !ret)
+ ret = -ERESTARTNOHAND;
+
+ return ret;
+}
+
+#endif
+
+COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
+ compat_aio_context_t, ctx_id,
+ compat_long_t, min_nr,
+ compat_long_t, nr,
+ struct io_event __user *, events,
+ struct __kernel_timespec __user *, timeout,
+ const struct __compat_aio_sigset __user *, usig)
+{
+ struct __compat_aio_sigset ksig = { NULL, };
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 t;
+ int ret;
+
+ if (timeout && get_timespec64(&t, timeout))
+ return -EFAULT;
+
+ if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+ return -EFAULT;
+
+ ret = set_compat_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize);
+ if (ret)
+ return ret;
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+ restore_user_sigmask(ksig.sigmask, &sigsaved);
+ if (signal_pending(current) && !ret)
+ ret = -ERESTARTNOHAND;
return ret;
}
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 9f9cadbfbd7a..3e59f0ed777b 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -42,6 +42,8 @@
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ":pid:%d:%s: " fmt, current->pid, __func__
+extern struct file_system_type autofs_fs_type;
+
/*
* Unified info structure. This is pointed to by both the dentry and
* inode structures. Each file in the filesystem has an instance of this
@@ -101,16 +103,19 @@ struct autofs_wait_queue {
#define AUTOFS_SBI_MAGIC 0x6d4a556d
+#define AUTOFS_SBI_CATATONIC 0x0001
+#define AUTOFS_SBI_STRICTEXPIRE 0x0002
+
struct autofs_sb_info {
u32 magic;
int pipefd;
struct file *pipe;
struct pid *oz_pgrp;
- int catatonic;
int version;
int sub_version;
int min_proto;
int max_proto;
+ unsigned int flags;
unsigned long exp_timeout;
unsigned int type;
struct super_block *sb;
@@ -126,8 +131,7 @@ struct autofs_sb_info {
static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
{
- return sb->s_magic != AUTOFS_SUPER_MAGIC ?
- NULL : (struct autofs_sb_info *)(sb->s_fs_info);
+ return (struct autofs_sb_info *)(sb->s_fs_info);
}
static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry)
@@ -141,7 +145,8 @@ static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry)
*/
static inline int autofs_oz_mode(struct autofs_sb_info *sbi)
{
- return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
+ return ((sbi->flags & AUTOFS_SBI_CATATONIC) ||
+ task_pgrp(current) == sbi->oz_pgrp);
}
struct inode *autofs_get_inode(struct super_block *, umode_t);
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index 86eafda4a652..e9fe74d1541b 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -151,22 +151,6 @@ out:
return err;
}
-/*
- * Get the autofs super block info struct from the file opened on
- * the autofs mount point.
- */
-static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f)
-{
- struct autofs_sb_info *sbi = NULL;
- struct inode *inode;
-
- if (f) {
- inode = file_inode(f);
- sbi = autofs_sbi(inode->i_sb);
- }
- return sbi;
-}
-
/* Return autofs dev ioctl version */
static int autofs_dev_ioctl_version(struct file *fp,
struct autofs_sb_info *sbi,
@@ -366,7 +350,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
pipefd = param->setpipefd.pipefd;
mutex_lock(&sbi->wq_mutex);
- if (!sbi->catatonic) {
+ if (!(sbi->flags & AUTOFS_SBI_CATATONIC)) {
mutex_unlock(&sbi->wq_mutex);
return -EBUSY;
} else {
@@ -393,7 +377,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
swap(sbi->oz_pgrp, new_pid);
sbi->pipefd = pipefd;
sbi->pipe = pipe;
- sbi->catatonic = 0;
+ sbi->flags &= ~AUTOFS_SBI_CATATONIC;
}
out:
put_pid(new_pid);
@@ -658,6 +642,8 @@ static int _autofs_dev_ioctl(unsigned int command,
if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD &&
cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD &&
cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) {
+ struct super_block *sb;
+
fp = fget(param->ioctlfd);
if (!fp) {
if (cmd == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD)
@@ -666,12 +652,13 @@ static int _autofs_dev_ioctl(unsigned int command,
goto out;
}
- sbi = autofs_dev_ioctl_sbi(fp);
- if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) {
+ sb = file_inode(fp)->i_sb;
+ if (sb->s_type != &autofs_fs_type) {
err = -EINVAL;
fput(fp);
goto out;
}
+ sbi = autofs_sbi(sb);
/*
* Admin needs to be able to set the mount catatonic in
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
index 79ae07d9592f..c0c1db2cc6ea 100644
--- a/fs/autofs/init.c
+++ b/fs/autofs/init.c
@@ -16,7 +16,7 @@ static struct dentry *autofs_mount(struct file_system_type *fs_type,
return mount_nodev(fs_type, flags, data, autofs_fill_super);
}
-static struct file_system_type autofs_fs_type = {
+struct file_system_type autofs_fs_type = {
.owner = THIS_MODULE,
.name = "autofs",
.mount = autofs_mount,
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 846c052569dd..0e8ea2d9a2bb 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -87,6 +87,8 @@ static int autofs_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",direct");
else
seq_printf(m, ",indirect");
+ if (sbi->flags & AUTOFS_SBI_STRICTEXPIRE)
+ seq_printf(m, ",strictexpire");
#ifdef CONFIG_CHECKPOINT_RESTORE
if (sbi->pipe)
seq_printf(m, ",pipe_ino=%ld", file_inode(sbi->pipe)->i_ino);
@@ -109,7 +111,7 @@ static const struct super_operations autofs_sops = {
};
enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
- Opt_indirect, Opt_direct, Opt_offset};
+ Opt_indirect, Opt_direct, Opt_offset, Opt_strictexpire};
static const match_table_t tokens = {
{Opt_fd, "fd=%u"},
@@ -121,24 +123,28 @@ static const match_table_t tokens = {
{Opt_indirect, "indirect"},
{Opt_direct, "direct"},
{Opt_offset, "offset"},
+ {Opt_strictexpire, "strictexpire"},
{Opt_err, NULL}
};
-static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
- int *pgrp, bool *pgrp_set, unsigned int *type,
- int *minproto, int *maxproto)
+static int parse_options(char *options,
+ struct inode *root, int *pgrp, bool *pgrp_set,
+ struct autofs_sb_info *sbi)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
+ int pipefd = -1;
+ kuid_t uid;
+ kgid_t gid;
- *uid = current_uid();
- *gid = current_gid();
+ root->i_uid = current_uid();
+ root->i_gid = current_gid();
- *minproto = AUTOFS_MIN_PROTO_VERSION;
- *maxproto = AUTOFS_MAX_PROTO_VERSION;
+ sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
+ sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
- *pipefd = -1;
+ sbi->pipefd = -1;
if (!options)
return 1;
@@ -152,22 +158,25 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
token = match_token(p, tokens, args);
switch (token) {
case Opt_fd:
- if (match_int(args, pipefd))
+ if (match_int(args, &pipefd))
return 1;
+ sbi->pipefd = pipefd;
break;
case Opt_uid:
if (match_int(args, &option))
return 1;
- *uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(*uid))
+ uid = make_kuid(current_user_ns(), option);
+ if (!uid_valid(uid))
return 1;
+ root->i_uid = uid;
break;
case Opt_gid:
if (match_int(args, &option))
return 1;
- *gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(*gid))
+ gid = make_kgid(current_user_ns(), option);
+ if (!gid_valid(gid))
return 1;
+ root->i_gid = gid;
break;
case Opt_pgrp:
if (match_int(args, &option))
@@ -178,27 +187,30 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
case Opt_minproto:
if (match_int(args, &option))
return 1;
- *minproto = option;
+ sbi->min_proto = option;
break;
case Opt_maxproto:
if (match_int(args, &option))
return 1;
- *maxproto = option;
+ sbi->max_proto = option;
break;
case Opt_indirect:
- set_autofs_type_indirect(type);
+ set_autofs_type_indirect(&sbi->type);
break;
case Opt_direct:
- set_autofs_type_direct(type);
+ set_autofs_type_direct(&sbi->type);
break;
case Opt_offset:
- set_autofs_type_offset(type);
+ set_autofs_type_offset(&sbi->type);
+ break;
+ case Opt_strictexpire:
+ sbi->flags |= AUTOFS_SBI_STRICTEXPIRE;
break;
default:
return 1;
}
}
- return (*pipefd < 0);
+ return (sbi->pipefd < 0);
}
int autofs_fill_super(struct super_block *s, void *data, int silent)
@@ -206,7 +218,6 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
struct inode *root_inode;
struct dentry *root;
struct file *pipe;
- int pipefd;
struct autofs_sb_info *sbi;
struct autofs_info *ino;
int pgrp = 0;
@@ -222,12 +233,12 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
sbi->magic = AUTOFS_SBI_MAGIC;
sbi->pipefd = -1;
sbi->pipe = NULL;
- sbi->catatonic = 1;
sbi->exp_timeout = 0;
sbi->oz_pgrp = NULL;
sbi->sb = s;
sbi->version = 0;
sbi->sub_version = 0;
+ sbi->flags = AUTOFS_SBI_CATATONIC;
set_autofs_type_indirect(&sbi->type);
sbi->min_proto = 0;
sbi->max_proto = 0;
@@ -262,9 +273,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
root->d_fsdata = ino;
/* Can this call block? */
- if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid,
- &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto,
- &sbi->max_proto)) {
+ if (parse_options(data, root_inode, &pgrp, &pgrp_set, sbi)) {
pr_err("called with bogus options\n");
goto fail_dput;
}
@@ -303,8 +312,9 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
root_inode->i_fop = &autofs_root_operations;
root_inode->i_op = &autofs_dir_inode_operations;
- pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp));
- pipe = fget(pipefd);
+ pr_debug("pipe fd = %d, pgrp = %u\n",
+ sbi->pipefd, pid_nr(sbi->oz_pgrp));
+ pipe = fget(sbi->pipefd);
if (!pipe) {
pr_err("could not open pipe file descriptor\n");
@@ -314,8 +324,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
if (ret < 0)
goto fail_fput;
sbi->pipe = pipe;
- sbi->pipefd = pipefd;
- sbi->catatonic = 0;
+ sbi->flags &= ~AUTOFS_SBI_CATATONIC;
/*
* Success! Install the root dentry now to indicate completion.
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 782e57b911ab..1246f396bf0e 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -275,8 +275,11 @@ static int autofs_mount_wait(const struct path *path, bool rcu_walk)
pr_debug("waiting for mount name=%pd\n", path->dentry);
status = autofs_wait(sbi, path, NFY_MOUNT);
pr_debug("mount wait done status=%d\n", status);
+ ino->last_used = jiffies;
+ return status;
}
- ino->last_used = jiffies;
+ if (!(sbi->flags & AUTOFS_SBI_STRICTEXPIRE))
+ ino->last_used = jiffies;
return status;
}
@@ -510,7 +513,8 @@ static struct dentry *autofs_lookup(struct inode *dir,
sbi = autofs_sbi(dir->i_sb);
pr_debug("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n",
- current->pid, task_pgrp_nr(current), sbi->catatonic,
+ current->pid, task_pgrp_nr(current),
+ sbi->flags & AUTOFS_SBI_CATATONIC,
autofs_oz_mode(sbi));
active = autofs_lookup_active(dentry);
@@ -563,7 +567,7 @@ static int autofs_dir_symlink(struct inode *dir,
* autofs mount is catatonic but the state of an autofs
* file system needs to be preserved over restarts.
*/
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -EACCES;
BUG_ON(!ino);
@@ -626,7 +630,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry)
* autofs mount is catatonic but the state of an autofs
* file system needs to be preserved over restarts.
*/
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -EACCES;
if (atomic_dec_and_test(&ino->count)) {
@@ -714,7 +718,7 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry)
* autofs mount is catatonic but the state of an autofs
* file system needs to be preserved over restarts.
*/
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -EACCES;
spin_lock(&sbi->lookup_lock);
@@ -759,7 +763,7 @@ static int autofs_dir_mkdir(struct inode *dir,
* autofs mount is catatonic but the state of an autofs
* file system needs to be preserved over restarts.
*/
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -EACCES;
pr_debug("dentry %p, creating %pd\n", dentry, dentry);
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index f6385c6ef0a5..15a3e31d0904 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -20,14 +20,14 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
struct autofs_wait_queue *wq, *nwq;
mutex_lock(&sbi->wq_mutex);
- if (sbi->catatonic) {
+ if (sbi->flags & AUTOFS_SBI_CATATONIC) {
mutex_unlock(&sbi->wq_mutex);
return;
}
pr_debug("entering catatonic mode\n");
- sbi->catatonic = 1;
+ sbi->flags |= AUTOFS_SBI_CATATONIC;
wq = sbi->queues;
sbi->queues = NULL; /* Erase all wait queues */
while (wq) {
@@ -255,7 +255,7 @@ static int validate_request(struct autofs_wait_queue **wait,
struct autofs_wait_queue *wq;
struct autofs_info *ino;
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -ENOENT;
/* Wait in progress, continue; */
@@ -290,7 +290,7 @@ static int validate_request(struct autofs_wait_queue **wait,
if (mutex_lock_interruptible(&sbi->wq_mutex))
return -EINTR;
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -ENOENT;
wq = autofs_find_wait(sbi, qstr);
@@ -359,7 +359,7 @@ int autofs_wait(struct autofs_sb_info *sbi,
pid_t tgid;
/* In catatonic mode, we don't wait for nobody */
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -ENOENT;
/*
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 67aef3bb89e4..606f9378b2f0 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -1,13 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/bfs/bfs.h
- * Copyright (C) 1999 Tigran Aivazian <tigran@veritas.com>
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
*/
#ifndef _FS_BFS_BFS_H
#define _FS_BFS_BFS_H
#include <linux/bfs_fs.h>
+/* In theory BFS supports up to 512 inodes, numbered from 2 (for /) up to 513 inclusive.
+ In actual fact, attempting to create the 512th inode (i.e. inode No. 513 or file No. 511)
+ will fail with ENOSPC in bfs_add_entry(): the root directory cannot contain so many entries, counting '..'.
+ So, mkfs.bfs(8) should really limit its -N option to 511 and not 512. For now, we just print a warning
+ if a filesystem is mounted with such "impossible to fill up" number of inodes */
+#define BFS_MAX_LASTI 513
+
/*
* BFS file system in-core superblock info
*/
@@ -17,7 +24,7 @@ struct bfs_sb_info {
unsigned long si_freei;
unsigned long si_lf_eblk;
unsigned long si_lasti;
- unsigned long *si_imap;
+ DECLARE_BITMAP(si_imap, BFS_MAX_LASTI+1);
struct mutex bfs_lock;
};
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index f32f21c3bbc7..d8dfe3a0cb39 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -2,8 +2,8 @@
/*
* fs/bfs/dir.c
* BFS directory operations.
- * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
- * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
+ * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005
*/
#include <linux/time.h>
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 1476cdd90cfb..0dceefc54b48 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -2,7 +2,7 @@
/*
* fs/bfs/file.c
* BFS file operations.
- * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
*
* Make the file block allocation algorithm understand the size
* of the underlying block device.
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index d81c148682e7..d136b2aaafb3 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -1,10 +1,9 @@
/*
* fs/bfs/inode.c
* BFS superblock and inode operations.
- * Copyright (C) 1999-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
* From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
- *
- * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
+ * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
*/
#include <linux/module.h>
@@ -118,12 +117,12 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct bfs_sb_info *info = BFS_SB(inode->i_sb);
unsigned int ino = (u16)inode->i_ino;
- unsigned long i_sblock;
+ unsigned long i_sblock;
struct bfs_inode *di;
struct buffer_head *bh;
int err = 0;
- dprintf("ino=%08x\n", ino);
+ dprintf("ino=%08x\n", ino);
di = find_inode(inode->i_sb, ino, &bh);
if (IS_ERR(di))
@@ -144,7 +143,7 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
- i_sblock = BFS_I(inode)->i_sblock;
+ i_sblock = BFS_I(inode)->i_sblock;
di->i_sblock = cpu_to_le32(i_sblock);
di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock);
di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
@@ -188,13 +187,13 @@ static void bfs_evict_inode(struct inode *inode)
mark_buffer_dirty(bh);
brelse(bh);
- if (bi->i_dsk_ino) {
+ if (bi->i_dsk_ino) {
if (bi->i_sblock)
info->si_freeb += bi->i_eblock + 1 - bi->i_sblock;
info->si_freei++;
clear_bit(ino, info->si_imap);
- bfs_dump_imap("delete_inode", s);
- }
+ bfs_dump_imap("evict_inode", s);
+ }
/*
* If this was the last file, make the previous block
@@ -214,7 +213,6 @@ static void bfs_put_super(struct super_block *s)
return;
mutex_destroy(&info->bfs_lock);
- kfree(info->si_imap);
kfree(info);
s->s_fs_info = NULL;
}
@@ -311,8 +309,7 @@ void bfs_dump_imap(const char *prefix, struct super_block *s)
else
strcat(tmpbuf, "0");
}
- printf("BFS-fs: %s: lasti=%08lx <%s>\n",
- prefix, BFS_SB(s)->si_lasti, tmpbuf);
+ printf("%s: lasti=%08lx <%s>\n", prefix, BFS_SB(s)->si_lasti, tmpbuf);
free_page((unsigned long)tmpbuf);
#endif
}
@@ -322,7 +319,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
struct buffer_head *bh, *sbh;
struct bfs_super_block *bfs_sb;
struct inode *inode;
- unsigned i, imap_len;
+ unsigned i;
struct bfs_sb_info *info;
int ret = -EINVAL;
unsigned long i_sblock, i_eblock, i_eoff, s_size;
@@ -341,8 +338,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
bfs_sb = (struct bfs_super_block *)sbh->b_data;
if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
if (!silent)
- printf("No BFS filesystem on %s (magic=%08x)\n",
- s->s_id, le32_to_cpu(bfs_sb->s_magic));
+ printf("No BFS filesystem on %s (magic=%08x)\n", s->s_id, le32_to_cpu(bfs_sb->s_magic));
goto out1;
}
if (BFS_UNCLEAN(bfs_sb, s) && !silent)
@@ -351,18 +347,16 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
s->s_magic = BFS_MAGIC;
if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) ||
- le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) {
- printf("Superblock is corrupted\n");
+ le32_to_cpu(bfs_sb->s_start) < sizeof(struct bfs_super_block) + sizeof(struct bfs_dirent)) {
+ printf("Superblock is corrupted on %s\n", s->s_id);
goto out1;
}
- info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) /
- sizeof(struct bfs_inode)
- + BFS_ROOT_INO - 1;
- imap_len = (info->si_lasti / 8) + 1;
- info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN);
- if (!info->si_imap) {
- printf("Cannot allocate %u bytes\n", imap_len);
+ info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / sizeof(struct bfs_inode) + BFS_ROOT_INO - 1;
+ if (info->si_lasti == BFS_MAX_LASTI)
+ printf("WARNING: filesystem %s was created with 512 inodes, the real maximum is 511, mounting anyway\n", s->s_id);
+ else if (info->si_lasti > BFS_MAX_LASTI) {
+ printf("Impossible last inode number %lu > %d on %s\n", info->si_lasti, BFS_MAX_LASTI, s->s_id);
goto out1;
}
for (i = 0; i < BFS_ROOT_INO; i++)
@@ -372,26 +366,25 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
inode = bfs_iget(s, BFS_ROOT_INO);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
- goto out2;
+ goto out1;
}
s->s_root = d_make_root(inode);
if (!s->s_root) {
ret = -ENOMEM;
- goto out2;
+ goto out1;
}
info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS;
- info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1
- - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS;
+ info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS;
info->si_freei = 0;
info->si_lf_eblk = 0;
/* can we read the last block? */
bh = sb_bread(s, info->si_blocks - 1);
if (!bh) {
- printf("Last block not available: %lu\n", info->si_blocks - 1);
+ printf("Last block not available on %s: %lu\n", s->s_id, info->si_blocks - 1);
ret = -EIO;
- goto out3;
+ goto out2;
}
brelse(bh);
@@ -425,11 +418,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
(i_eoff != le32_to_cpu(-1) && i_eoff > s_size) ||
i_sblock * BFS_BSIZE > i_eoff) {
- printf("Inode 0x%08x corrupted\n", i);
+ printf("Inode 0x%08x corrupted on %s\n", i, s->s_id);
brelse(bh);
ret = -EIO;
- goto out3;
+ goto out2;
}
if (!di->i_ino) {
@@ -445,14 +438,12 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
}
brelse(bh);
brelse(sbh);
- bfs_dump_imap("read_super", s);
+ bfs_dump_imap("fill_super", s);
return 0;
-out3:
+out2:
dput(s->s_root);
s->s_root = NULL;
-out2:
- kfree(info->si_imap);
out1:
brelse(sbh);
out:
@@ -482,7 +473,7 @@ static int __init init_bfs_fs(void)
int err = init_inodecache();
if (err)
goto out1;
- err = register_filesystem(&bfs_fs_type);
+ err = register_filesystem(&bfs_fs_type);
if (err)
goto out;
return 0;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index c3deb2e35f20..ca9725f18e00 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -78,9 +78,9 @@ static int aout_core_dump(struct coredump_params *cprm)
/* make sure we actually have a data and stack area to dump */
set_fs(USER_DS);
- if (!access_ok(VERIFY_READ, START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
+ if (!access_ok(START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
dump.u_dsize = 0;
- if (!access_ok(VERIFY_READ, START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
+ if (!access_ok(START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
dump.u_ssize = 0;
set_fs(KERNEL_DS);
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 7cde3f46ad26..d0078cbb718b 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -42,10 +42,14 @@ static int load_script(struct linux_binprm *bprm)
fput(bprm->file);
bprm->file = NULL;
- bprm->buf[BINPRM_BUF_SIZE - 1] = '\0';
- if ((cp = strchr(bprm->buf, '\n')) == NULL)
- cp = bprm->buf+BINPRM_BUF_SIZE-1;
+ for (cp = bprm->buf+2;; cp++) {
+ if (cp >= bprm->buf + BINPRM_BUF_SIZE)
+ return -ENOEXEC;
+ if (!*cp || (*cp == '\n'))
+ break;
+ }
*cp = '\0';
+
while (cp > bprm->buf) {
cp--;
if ((*cp == ' ') || (*cp == '\t'))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index a80b4f0ee7c4..58a4c1217fa8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -104,6 +104,20 @@ void invalidate_bdev(struct block_device *bdev)
}
EXPORT_SYMBOL(invalidate_bdev);
+static void set_init_blocksize(struct block_device *bdev)
+{
+ unsigned bsize = bdev_logical_block_size(bdev);
+ loff_t size = i_size_read(bdev->bd_inode);
+
+ while (bsize < PAGE_SIZE) {
+ if (size & bsize)
+ break;
+ bsize <<= 1;
+ }
+ bdev->bd_block_size = bsize;
+ bdev->bd_inode->i_blkbits = blksize_bits(bsize);
+}
+
int set_blocksize(struct block_device *bdev, int size)
{
/* Size must be a power of two, and between 512 and PAGE_SIZE */
@@ -181,7 +195,7 @@ static void blkdev_bio_end_io_simple(struct bio *bio)
struct task_struct *waiter = bio->bi_private;
WRITE_ONCE(bio->bi_private, NULL);
- wake_up_process(waiter);
+ blk_wake_io_task(waiter);
}
static ssize_t
@@ -232,6 +246,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
bio.bi_opf = dio_bio_write_op(iocb);
task_io_account_write(ret);
}
+ if (iocb->ki_flags & IOCB_HIPRI)
+ bio.bi_opf |= REQ_HIPRI;
qc = submit_bio(&bio);
for (;;) {
@@ -239,7 +255,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
if (!READ_ONCE(bio.bi_private))
break;
if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(bdev), qc))
+ !blk_poll(bdev_get_queue(bdev), qc, true))
io_schedule();
}
__set_current_state(TASK_RUNNING);
@@ -298,12 +314,13 @@ static void blkdev_bio_end_io(struct bio *bio)
}
dio->iocb->ki_complete(iocb, ret, 0);
- bio_put(&dio->bio);
+ if (dio->multi_bio)
+ bio_put(&dio->bio);
} else {
struct task_struct *waiter = dio->waiter;
WRITE_ONCE(dio->waiter, NULL);
- wake_up_process(waiter);
+ blk_wake_io_task(waiter);
}
}
@@ -328,6 +345,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
struct blk_plug plug;
struct blkdev_dio *dio;
struct bio *bio;
+ bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
blk_qc_t qc = BLK_QC_T_NONE;
@@ -338,20 +356,27 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
return -EINVAL;
bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
- bio_get(bio); /* extra ref for the completion handler */
dio = container_of(bio, struct blkdev_dio, bio);
dio->is_sync = is_sync = is_sync_kiocb(iocb);
- if (dio->is_sync)
+ if (dio->is_sync) {
dio->waiter = current;
- else
+ bio_get(bio);
+ } else {
dio->iocb = iocb;
+ }
dio->size = 0;
dio->multi_bio = false;
dio->should_dirty = is_read && iter_is_iovec(iter);
- blk_start_plug(&plug);
+ /*
+ * Don't plug for HIPRI/polled IO, as those should go straight
+ * to issue
+ */
+ if (!is_poll)
+ blk_start_plug(&plug);
+
for (;;) {
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = pos >> 9;
@@ -381,11 +406,21 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
if (!nr_pages) {
+ if (iocb->ki_flags & IOCB_HIPRI)
+ bio->bi_opf |= REQ_HIPRI;
+
qc = submit_bio(bio);
break;
}
if (!dio->multi_bio) {
+ /*
+ * AIO needs an extra reference to ensure the dio
+ * structure which is embedded into the first bio
+ * stays around.
+ */
+ if (!is_sync)
+ bio_get(bio);
dio->multi_bio = true;
atomic_set(&dio->ref, 2);
} else {
@@ -395,7 +430,9 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
submit_bio(bio);
bio = bio_alloc(GFP_KERNEL, nr_pages);
}
- blk_finish_plug(&plug);
+
+ if (!is_poll)
+ blk_finish_plug(&plug);
if (!is_sync)
return -EIOCBQUEUED;
@@ -406,7 +443,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
break;
if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(bdev), qc))
+ !blk_poll(bdev_get_queue(bdev), qc, true))
io_schedule();
}
__set_current_state(TASK_RUNNING);
@@ -1408,18 +1445,9 @@ EXPORT_SYMBOL(check_disk_change);
void bd_set_size(struct block_device *bdev, loff_t size)
{
- unsigned bsize = bdev_logical_block_size(bdev);
-
inode_lock(bdev->bd_inode);
i_size_write(bdev->bd_inode, size);
inode_unlock(bdev->bd_inode);
- while (bsize < PAGE_SIZE) {
- if (size & bsize)
- break;
- bsize <<= 1;
- }
- bdev->bd_block_size = bsize;
- bdev->bd_inode->i_blkbits = blksize_bits(bsize);
}
EXPORT_SYMBOL(bd_set_size);
@@ -1496,8 +1524,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
}
}
- if (!ret)
+ if (!ret) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
+ set_init_blocksize(bdev);
+ }
/*
* If the device is invalidated, rescan partition
@@ -1532,6 +1562,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_clear;
}
bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
+ set_init_blocksize(bdev);
}
if (bdev->bd_bdi == &noop_backing_dev_info)
@@ -1966,6 +1997,7 @@ static const struct address_space_operations def_blk_aops = {
.writepages = blkdev_writepages,
.releasepage = blkdev_releasepage,
.direct_IO = blkdev_direct_IO,
+ .migratepage = buffer_migrate_page_norefs,
.is_dirty_writeback = buffer_check_dirty_writeback,
};
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8b1d06fa222d..7a2a2621f0d9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1148,9 +1148,6 @@ struct btrfs_fs_info {
struct mutex unused_bg_unpin_mutex;
struct mutex delete_unused_bgs_mutex;
- /* For btrfs to record security options */
- struct security_mnt_opts security_opts;
-
/*
* Chunks that can't be freed yet (under a trim/discard operation)
* and will be latter freed. Protected by fs_info->chunk_mutex.
@@ -3028,7 +3025,6 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->free_space_root);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
- security_free_mnt_opts(&fs_info->security_opts);
kvfree(fs_info);
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fc126b92ea59..52abe4082680 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4103,8 +4103,7 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages,
while (!list_empty(pages)) {
for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
- struct page *page = list_entry(pages->prev,
- struct page, lru);
+ struct page *page = lru_to_page(pages);
prefetchw(&page->flags);
list_del(&page->lru);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 1b15b43905f8..7ea2d6b1f170 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6646,7 +6646,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
goto out;
}
- if (!access_ok(VERIFY_READ, arg->clone_sources,
+ if (!access_ok(arg->clone_sources,
sizeof(*arg->clone_sources) *
arg->clone_sources_count)) {
ret = -EFAULT;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 368a5b9e6c13..c5586ffd1426 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1458,56 +1458,6 @@ out:
return root;
}
-static int parse_security_options(char *orig_opts,
- struct security_mnt_opts *sec_opts)
-{
- char *secdata = NULL;
- int ret = 0;
-
- secdata = alloc_secdata();
- if (!secdata)
- return -ENOMEM;
- ret = security_sb_copy_data(orig_opts, secdata);
- if (ret) {
- free_secdata(secdata);
- return ret;
- }
- ret = security_sb_parse_opts_str(secdata, sec_opts);
- free_secdata(secdata);
- return ret;
-}
-
-static int setup_security_options(struct btrfs_fs_info *fs_info,
- struct super_block *sb,
- struct security_mnt_opts *sec_opts)
-{
- int ret = 0;
-
- /*
- * Call security_sb_set_mnt_opts() to check whether new sec_opts
- * is valid.
- */
- ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL);
- if (ret)
- return ret;
-
-#ifdef CONFIG_SECURITY
- if (!fs_info->security_opts.num_mnt_opts) {
- /* first time security setup, copy sec_opts to fs_info */
- memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts));
- } else {
- /*
- * Since SELinux (the only one supporting security_mnt_opts)
- * does NOT support changing context during remount/mount of
- * the same sb, this must be the same or part of the same
- * security options, just free it.
- */
- security_free_mnt_opts(sec_opts);
- }
-#endif
- return ret;
-}
-
/*
* Find a superblock for the given device / mount point.
*
@@ -1522,16 +1472,15 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
struct btrfs_device *device = NULL;
struct btrfs_fs_devices *fs_devices = NULL;
struct btrfs_fs_info *fs_info = NULL;
- struct security_mnt_opts new_sec_opts;
+ void *new_sec_opts = NULL;
fmode_t mode = FMODE_READ;
int error = 0;
if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
- security_init_mnt_opts(&new_sec_opts);
if (data) {
- error = parse_security_options(data, &new_sec_opts);
+ error = security_sb_eat_lsm_opts(data, &new_sec_opts);
if (error)
return ERR_PTR(error);
}
@@ -1550,7 +1499,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
- security_init_mnt_opts(&fs_info->security_opts);
if (!fs_info->super_copy || !fs_info->super_for_commit) {
error = -ENOMEM;
goto error_fs_info;
@@ -1601,16 +1549,12 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
btrfs_sb(s)->bdev_holder = fs_type;
error = btrfs_fill_super(s, fs_devices, data);
}
+ if (!error)
+ error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL);
+ security_free_mnt_opts(&new_sec_opts);
if (error) {
deactivate_locked_super(s);
- goto error_sec_opts;
- }
-
- fs_info = btrfs_sb(s);
- error = setup_security_options(fs_info, s, &new_sec_opts);
- if (error) {
- deactivate_locked_super(s);
- goto error_sec_opts;
+ return ERR_PTR(error);
}
return dget(s->s_root);
@@ -1779,18 +1723,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
btrfs_remount_prepare(fs_info);
if (data) {
- struct security_mnt_opts new_sec_opts;
+ void *new_sec_opts = NULL;
- security_init_mnt_opts(&new_sec_opts);
- ret = parse_security_options(data, &new_sec_opts);
+ ret = security_sb_eat_lsm_opts(data, &new_sec_opts);
+ if (!ret)
+ ret = security_sb_remount(sb, new_sec_opts);
+ security_free_mnt_opts(&new_sec_opts);
if (ret)
goto restore;
- ret = setup_security_options(fs_info, sb,
- &new_sec_opts);
- if (ret) {
- security_free_mnt_opts(&new_sec_opts);
- goto restore;
- }
}
ret = btrfs_parse_options(fs_info, data, *flags);
diff --git a/fs/buffer.c b/fs/buffer.c
index 1286c2b95498..52d024bfdbc1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2366,7 +2366,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
balance_dirty_pages_ratelimited(mapping);
- if (unlikely(fatal_signal_pending(current))) {
+ if (fatal_signal_pending(current)) {
err = -EINTR;
goto out;
}
@@ -3060,11 +3060,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
*/
bio = bio_alloc(GFP_NOIO, 1);
- if (wbc) {
- wbc_init_bio(wbc, bio);
- wbc_account_io(wbc, bh->b_page, bh->b_size);
- }
-
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev);
bio->bi_write_hint = write_hint;
@@ -3084,6 +3079,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
op_flags |= REQ_PRIO;
bio_set_op_attrs(bio, op, op_flags);
+ if (wbc) {
+ wbc_init_bio(wbc, bio);
+ wbc_account_io(wbc, bh->b_page, bh->b_size);
+ }
+
submit_bio(bio);
return 0;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8eade7a993c1..a47c541f8006 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -306,7 +306,7 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct page *page = list_entry(page_list->prev, struct page, lru);
+ struct page *page = lru_to_page(page_list);
struct ceph_vino vino;
struct ceph_osd_request *req;
u64 off;
@@ -333,8 +333,7 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
if (got)
ceph_put_cap_refs(ci, got);
while (!list_empty(page_list)) {
- page = list_entry(page_list->prev,
- struct page, lru);
+ page = lru_to_page(page_list);
list_del(&page->lru);
put_page(page);
}
@@ -1495,10 +1494,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
if (err < 0 || off >= i_size_read(inode)) {
unlock_page(page);
put_page(page);
- if (err == -ENOMEM)
- ret = VM_FAULT_OOM;
- else
- ret = VM_FAULT_SIGBUS;
+ ret = vmf_error(err);
goto out_inline;
}
if (err < PAGE_SIZE)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f3496db4bb3e..94c026bba2c2 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -657,6 +657,9 @@ void ceph_add_cap(struct inode *inode,
session->s_nr_caps++;
spin_unlock(&session->s_cap_lock);
} else {
+ if (cap->cap_gen < session->s_cap_gen)
+ cap->issued = cap->implemented = CEPH_CAP_PIN;
+
/*
* auth mds of the inode changed. we received the cap export
* message, but still haven't received the cap import message.
@@ -1855,14 +1858,17 @@ retry_locked:
retain |= CEPH_CAP_ANY; /* be greedy */
} else if (S_ISDIR(inode->i_mode) &&
(issued & CEPH_CAP_FILE_SHARED) &&
- __ceph_dir_is_complete(ci)) {
+ __ceph_dir_is_complete(ci)) {
/*
* If a directory is complete, we want to keep
* the exclusive cap. So that MDS does not end up
* revoking the shared cap on every create/unlink
* operation.
*/
- want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
+ if (IS_RDONLY(inode))
+ want = CEPH_CAP_ANY_SHARED;
+ else
+ want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
retain |= want;
} else {
@@ -1970,8 +1976,7 @@ retry_locked:
goto ack;
/* things we might delay */
- if ((cap->issued & ~retain) == 0 &&
- cap->mds_wanted == want)
+ if ((cap->issued & ~retain) == 0)
continue; /* nope, all good */
if (no_delay)
@@ -3048,7 +3053,8 @@ static void handle_cap_grant(struct inode *inode,
int used, wanted, dirty;
u64 size = le64_to_cpu(grant->size);
u64 max_size = le64_to_cpu(grant->max_size);
- int check_caps = 0;
+ unsigned char check_caps = 0;
+ bool was_stale = cap->cap_gen < session->s_cap_gen;
bool wake = false;
bool writeback = false;
bool queue_trunc = false;
@@ -3063,21 +3069,6 @@ static void handle_cap_grant(struct inode *inode,
/*
- * auth mds of the inode changed. we received the cap export message,
- * but still haven't received the cap import message. handle_cap_export
- * updated the new auth MDS' cap.
- *
- * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
- * that was sent before the cap import message. So don't remove caps.
- */
- if (ceph_seq_cmp(seq, cap->seq) <= 0) {
- WARN_ON(cap != ci->i_auth_cap);
- WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
- seq = cap->seq;
- newcaps |= cap->issued;
- }
-
- /*
* If CACHE is being revoked, and we have no dirty buffers,
* try to invalidate (once). (If there are dirty buffers, we
* will invalidate _after_ writeback.)
@@ -3096,6 +3087,24 @@ static void handle_cap_grant(struct inode *inode,
}
}
+ if (was_stale)
+ cap->issued = cap->implemented = CEPH_CAP_PIN;
+
+ /*
+ * auth mds of the inode changed. we received the cap export message,
+ * but still haven't received the cap import message. handle_cap_export
+ * updated the new auth MDS' cap.
+ *
+ * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
+ * that was sent before the cap import message. So don't remove caps.
+ */
+ if (ceph_seq_cmp(seq, cap->seq) <= 0) {
+ WARN_ON(cap != ci->i_auth_cap);
+ WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
+ seq = cap->seq;
+ newcaps |= cap->issued;
+ }
+
/* side effects now are allowed */
cap->cap_gen = session->s_cap_gen;
cap->seq = seq;
@@ -3200,13 +3209,20 @@ static void handle_cap_grant(struct inode *inode,
ceph_cap_string(wanted),
ceph_cap_string(used),
ceph_cap_string(dirty));
- if (wanted != le32_to_cpu(grant->wanted)) {
- dout("mds wanted %s -> %s\n",
- ceph_cap_string(le32_to_cpu(grant->wanted)),
- ceph_cap_string(wanted));
- /* imported cap may not have correct mds_wanted */
- if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT)
- check_caps = 1;
+
+ if ((was_stale || le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) &&
+ (wanted & ~(cap->mds_wanted | newcaps))) {
+ /*
+ * If mds is importing cap, prior cap messages that update
+ * 'wanted' may get dropped by mds (migrate seq mismatch).
+ *
+ * We don't send cap message to update 'wanted' if what we
+ * want are already issued. If mds revokes caps, cap message
+ * that releases caps also tells mds what we want. But if
+ * caps got revoked by mds forcedly (session stale). We may
+ * haven't told mds what we want.
+ */
+ check_caps = 1;
}
/* revocation, grant, or no-op? */
@@ -3539,9 +3555,9 @@ retry:
goto out_unlock;
if (target < 0) {
- __ceph_remove_cap(cap, false);
- if (!ci->i_auth_cap)
+ if (cap->mds_wanted | cap->issued)
ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
+ __ceph_remove_cap(cap, false);
goto out_unlock;
}
@@ -3569,7 +3585,6 @@ retry:
tcap->cap_id = t_cap_id;
tcap->seq = t_seq - 1;
tcap->issue_seq = t_seq - 1;
- tcap->mseq = t_mseq;
tcap->issued |= issued;
tcap->implemented |= issued;
if (cap == ci->i_auth_cap)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 79dd5e6ed755..9d1f34d46627 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1098,8 +1098,9 @@ out_unlock:
* splice a dentry to an inode.
* caller must hold directory i_mutex for this to be safe.
*/
-static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
+static int splice_dentry(struct dentry **pdn, struct inode *in)
{
+ struct dentry *dn = *pdn;
struct dentry *realdn;
BUG_ON(d_inode(dn));
@@ -1132,28 +1133,23 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
if (IS_ERR(realdn)) {
pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
PTR_ERR(realdn), dn, in, ceph_vinop(in));
- dn = realdn;
- /*
- * Caller should release 'dn' in the case of error.
- * If 'req->r_dentry' is passed to this function,
- * caller should leave 'req->r_dentry' untouched.
- */
- goto out;
- } else if (realdn) {
+ return PTR_ERR(realdn);
+ }
+
+ if (realdn) {
dout("dn %p (%d) spliced with %p (%d) "
"inode %p ino %llx.%llx\n",
dn, d_count(dn),
realdn, d_count(realdn),
d_inode(realdn), ceph_vinop(d_inode(realdn)));
dput(dn);
- dn = realdn;
+ *pdn = realdn;
} else {
BUG_ON(!ceph_dentry(dn));
dout("dn %p attached to %p ino %llx.%llx\n",
dn, d_inode(dn), ceph_vinop(d_inode(dn)));
}
-out:
- return dn;
+ return 0;
}
/*
@@ -1340,7 +1336,12 @@ retry_lookup:
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
ceph_dentry(req->r_old_dentry)->offset);
- dn = req->r_old_dentry; /* use old_dentry */
+ /* swap r_dentry and r_old_dentry in case that
+ * splice_dentry() gets called later. This is safe
+ * because no other place will use them */
+ req->r_dentry = req->r_old_dentry;
+ req->r_old_dentry = dn;
+ dn = req->r_dentry;
}
/* null dentry? */
@@ -1365,12 +1366,10 @@ retry_lookup:
if (d_really_is_negative(dn)) {
ceph_dir_clear_ordered(dir);
ihold(in);
- dn = splice_dentry(dn, in);
- if (IS_ERR(dn)) {
- err = PTR_ERR(dn);
+ err = splice_dentry(&req->r_dentry, in);
+ if (err < 0)
goto done;
- }
- req->r_dentry = dn; /* may have spliced */
+ dn = req->r_dentry; /* may have spliced */
} else if (d_really_is_positive(dn) && d_inode(dn) != in) {
dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
dn, d_inode(dn), ceph_vinop(d_inode(dn)),
@@ -1390,22 +1389,18 @@ retry_lookup:
} else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
req->r_op == CEPH_MDS_OP_MKSNAP) &&
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
- struct dentry *dn = req->r_dentry;
struct inode *dir = req->r_parent;
/* fill out a snapdir LOOKUPSNAP dentry */
- BUG_ON(!dn);
BUG_ON(!dir);
BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
- dout(" linking snapped dir %p to dn %p\n", in, dn);
+ BUG_ON(!req->r_dentry);
+ dout(" linking snapped dir %p to dn %p\n", in, req->r_dentry);
ceph_dir_clear_ordered(dir);
ihold(in);
- dn = splice_dentry(dn, in);
- if (IS_ERR(dn)) {
- err = PTR_ERR(dn);
+ err = splice_dentry(&req->r_dentry, in);
+ if (err < 0)
goto done;
- }
- req->r_dentry = dn; /* may have spliced */
} else if (rinfo->head->is_dentry) {
struct ceph_vino *ptvino = NULL;
@@ -1669,8 +1664,6 @@ retry_lookup:
}
if (d_really_is_negative(dn)) {
- struct dentry *realdn;
-
if (ceph_security_xattr_deadlock(in)) {
dout(" skip splicing dn %p to inode %p"
" (security xattr deadlock)\n", dn, in);
@@ -1679,13 +1672,9 @@ retry_lookup:
goto next_item;
}
- realdn = splice_dentry(dn, in);
- if (IS_ERR(realdn)) {
- err = PTR_ERR(realdn);
- d_drop(dn);
+ err = splice_dentry(&dn, in);
+ if (err < 0)
goto next_item;
- }
- dn = realdn;
}
ceph_dentry(dn)->offset = rde->offset;
@@ -1701,8 +1690,7 @@ retry_lookup:
err = ret;
}
next_item:
- if (dn)
- dput(dn);
+ dput(dn);
}
out:
if (err == 0 && skipped == 0) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index bd13a3267ae0..163fc74bf221 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1232,13 +1232,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
dout("removing cap %p, ci is %p, inode is %p\n",
cap, ci, &ci->vfs_inode);
spin_lock(&ci->i_ceph_lock);
+ if (cap->mds_wanted | cap->issued)
+ ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
__ceph_remove_cap(cap, false);
if (!ci->i_auth_cap) {
struct ceph_cap_flush *cf;
struct ceph_mds_client *mdsc = fsc->mdsc;
- ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
-
if (ci->i_wrbuffer_ref > 0 &&
READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
invalidate = true;
@@ -1355,6 +1355,12 @@ static void remove_session_caps(struct ceph_mds_session *session)
dispose_cap_releases(session->s_mdsc, &dispose);
}
+enum {
+ RECONNECT,
+ RENEWCAPS,
+ FORCE_RO,
+};
+
/*
* wake up any threads waiting on this session's caps. if the cap is
* old (didn't get renewed on the client reconnect), remove it now.
@@ -1365,23 +1371,34 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
void *arg)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ unsigned long ev = (unsigned long)arg;
- if (arg) {
+ if (ev == RECONNECT) {
spin_lock(&ci->i_ceph_lock);
ci->i_wanted_max_size = 0;
ci->i_requested_max_size = 0;
spin_unlock(&ci->i_ceph_lock);
+ } else if (ev == RENEWCAPS) {
+ if (cap->cap_gen < cap->session->s_cap_gen) {
+ /* mds did not re-issue stale cap */
+ spin_lock(&ci->i_ceph_lock);
+ cap->issued = cap->implemented = CEPH_CAP_PIN;
+ /* make sure mds knows what we want */
+ if (__ceph_caps_file_wanted(ci) & ~cap->mds_wanted)
+ ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
+ spin_unlock(&ci->i_ceph_lock);
+ }
+ } else if (ev == FORCE_RO) {
}
wake_up_all(&ci->i_cap_wq);
return 0;
}
-static void wake_up_session_caps(struct ceph_mds_session *session,
- int reconnect)
+static void wake_up_session_caps(struct ceph_mds_session *session, int ev)
{
dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
iterate_session_caps(session, wake_up_session_cb,
- (void *)(unsigned long)reconnect);
+ (void *)(unsigned long)ev);
}
/*
@@ -1466,7 +1483,7 @@ static void renewed_caps(struct ceph_mds_client *mdsc,
spin_unlock(&session->s_cap_lock);
if (wake)
- wake_up_session_caps(session, 0);
+ wake_up_session_caps(session, RENEWCAPS);
}
/*
@@ -2847,7 +2864,7 @@ static void handle_session(struct ceph_mds_session *session,
spin_lock(&session->s_cap_lock);
session->s_readonly = true;
spin_unlock(&session->s_cap_lock);
- wake_up_session_caps(session, 0);
+ wake_up_session_caps(session, FORCE_RO);
break;
case CEPH_SESSION_REJECT:
@@ -2943,11 +2960,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_inode_info *ci = cap->ci;
struct ceph_reconnect_state *recon_state = arg;
struct ceph_pagelist *pagelist = recon_state->pagelist;
- char *path;
- int pathlen, err;
- u64 pathbase;
+ int err;
u64 snap_follows;
- struct dentry *dentry;
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
inode, ceph_vinop(inode), cap, cap->cap_id,
@@ -2956,19 +2970,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
if (err)
return err;
- dentry = d_find_alias(inode);
- if (dentry) {
- path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out_dput;
- }
- } else {
- path = NULL;
- pathlen = 0;
- pathbase = 0;
- }
-
spin_lock(&ci->i_ceph_lock);
cap->seq = 0; /* reset cap seq */
cap->issue_seq = 0; /* and issue_seq */
@@ -2980,7 +2981,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
rec.v2.issued = cpu_to_le32(cap->issued);
rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
- rec.v2.pathbase = cpu_to_le64(pathbase);
+ rec.v2.pathbase = 0;
rec.v2.flock_len = (__force __le32)
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
} else {
@@ -2991,7 +2992,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
- rec.v1.pathbase = cpu_to_le64(pathbase);
+ rec.v1.pathbase = 0;
}
if (list_empty(&ci->i_cap_snaps)) {
@@ -3023,7 +3024,7 @@ encode_again:
GFP_NOFS);
if (!flocks) {
err = -ENOMEM;
- goto out_free;
+ goto out_err;
}
err = ceph_encode_locks_to_buffer(inode, flocks,
num_fcntl_locks,
@@ -3033,7 +3034,7 @@ encode_again:
flocks = NULL;
if (err == -ENOSPC)
goto encode_again;
- goto out_free;
+ goto out_err;
}
} else {
kfree(flocks);
@@ -3053,44 +3054,64 @@ encode_again:
sizeof(struct ceph_filelock);
rec.v2.flock_len = cpu_to_le32(struct_len);
- struct_len += sizeof(rec.v2);
- struct_len += sizeof(u32) + pathlen;
+ struct_len += sizeof(u32) + sizeof(rec.v2);
if (struct_v >= 2)
struct_len += sizeof(u64); /* snap_follows */
total_len += struct_len;
err = ceph_pagelist_reserve(pagelist, total_len);
+ if (err) {
+ kfree(flocks);
+ goto out_err;
+ }
- if (!err) {
- if (recon_state->msg_version >= 3) {
- ceph_pagelist_encode_8(pagelist, struct_v);
- ceph_pagelist_encode_8(pagelist, 1);
- ceph_pagelist_encode_32(pagelist, struct_len);
- }
- ceph_pagelist_encode_string(pagelist, path, pathlen);
- ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
- ceph_locks_to_pagelist(flocks, pagelist,
- num_fcntl_locks,
- num_flock_locks);
- if (struct_v >= 2)
- ceph_pagelist_encode_64(pagelist, snap_follows);
+ if (recon_state->msg_version >= 3) {
+ ceph_pagelist_encode_8(pagelist, struct_v);
+ ceph_pagelist_encode_8(pagelist, 1);
+ ceph_pagelist_encode_32(pagelist, struct_len);
}
+ ceph_pagelist_encode_string(pagelist, NULL, 0);
+ ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
+ ceph_locks_to_pagelist(flocks, pagelist,
+ num_fcntl_locks, num_flock_locks);
+ if (struct_v >= 2)
+ ceph_pagelist_encode_64(pagelist, snap_follows);
+
kfree(flocks);
} else {
- size_t size = sizeof(u32) + pathlen + sizeof(rec.v1);
- err = ceph_pagelist_reserve(pagelist, size);
- if (!err) {
- ceph_pagelist_encode_string(pagelist, path, pathlen);
- ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
+ u64 pathbase = 0;
+ int pathlen = 0;
+ char *path = NULL;
+ struct dentry *dentry;
+
+ dentry = d_find_alias(inode);
+ if (dentry) {
+ path = ceph_mdsc_build_path(dentry,
+ &pathlen, &pathbase, 0);
+ dput(dentry);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out_err;
+ }
+ rec.v1.pathbase = cpu_to_le64(pathbase);
}
+
+ err = ceph_pagelist_reserve(pagelist,
+ pathlen + sizeof(u32) + sizeof(rec.v1));
+ if (err) {
+ kfree(path);
+ goto out_err;
+ }
+
+ ceph_pagelist_encode_string(pagelist, path, pathlen);
+ ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
+
+ kfree(path);
}
recon_state->nr_caps++;
-out_free:
- kfree(path);
-out_dput:
- dput(dentry);
+out_err:
return err;
}
@@ -3339,7 +3360,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
pr_info("mds%d recovery completed\n", s->s_mds);
kick_requests(mdsc, i);
ceph_kick_flushing_caps(mdsc, s);
- wake_up_session_caps(s, 1);
+ wake_up_session_caps(s, RECONNECT);
}
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 32fcce0d4d3c..729da155ebf0 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -17,14 +17,16 @@
#include <linux/ceph/auth.h>
/* The first 8 bits are reserved for old ceph releases */
-#define CEPHFS_FEATURE_MIMIC 8
-
-#define CEPHFS_FEATURES_ALL { \
- 0, 1, 2, 3, 4, 5, 6, 7, \
- CEPHFS_FEATURE_MIMIC, \
+#define CEPHFS_FEATURE_MIMIC 8
+#define CEPHFS_FEATURE_REPLY_ENCODING 9
+#define CEPHFS_FEATURE_RECLAIM_CLIENT 10
+#define CEPHFS_FEATURE_LAZY_CAP_WANTED 11
+
+#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \
+ 0, 1, 2, 3, 4, 5, 6, 7, \
+ CEPHFS_FEATURE_MIMIC, \
+ CEPHFS_FEATURE_LAZY_CAP_WANTED, \
}
-
-#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL
#define CEPHFS_FEATURES_CLIENT_REQUIRED {}
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 44e53abeb32a..1a2c5d390f7f 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -35,7 +35,6 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
/* pick */
n = prandom_u32() % n;
- i = 0;
for (i = 0; n > 0; i++, n--)
while (m->m_info[i].state <= 0)
i++;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4e9a7cc488da..da2cd8e89062 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -530,7 +530,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_putc(m, ',');
pos = m->count;
- ret = ceph_print_client_options(m, fsc->client);
+ ret = ceph_print_client_options(m, fsc->client, false);
if (ret)
return ret;
@@ -640,7 +640,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
opt = NULL; /* fsc->client now owns this */
fsc->client->extra_mon_dispatch = extra_mon_dispatch;
- fsc->client->osdc.abort_on_full = true;
+ ceph_set_opt(fsc->client, ABORT_ON_FULL);
if (!fsopt->mds_namespace) {
ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 79a265ba9200..dfb64a5211b6 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -810,7 +810,7 @@ static inline int default_congestion_kb(void)
* This allows larger machines to have larger/more transfers.
* Limit the default to 256M
*/
- congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
+ congestion_kb = (16*int_sqrt(totalram_pages())) << (PAGE_SHIFT-10);
if (congestion_kb > 256*1024)
congestion_kb = 256*1024;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 85dadb93c992..f1ddc9d03c10 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -190,8 +190,9 @@ config CIFS_DFS_UPCALL
moves to a different server. This feature also enables
an upcall mechanism for CIFS which contacts userspace helper
utilities to provide server name resolution (host names to
- IP addresses) which is needed for implicit mounts of DFS junction
- points. If unsure, say Y.
+ IP addresses) which is needed in order to reconnect to
+ servers if their addresses change or for implicit mounts of
+ DFS junction points. If unsure, say Y.
config CIFS_NFSD_EXPORT
bool "Allow nfsd to export CIFS file system"
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 85817991ee68..51af69a1a328 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -17,7 +17,7 @@ cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
-cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o
+cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o dfs_cache.o
cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index ba178b09de0b..593fb422d0f3 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -30,6 +30,9 @@
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifsfs.h"
+#ifdef CONFIG_CIFS_DFS_UPCALL
+#include "dfs_cache.h"
+#endif
#ifdef CONFIG_CIFS_SMB_DIRECT
#include "smbdirect.h"
#endif
@@ -629,6 +632,11 @@ cifs_proc_init(void)
&cifs_security_flags_proc_fops);
proc_create("LookupCacheEnabled", 0644, proc_fs_cifs,
&cifs_lookup_cache_proc_fops);
+
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ proc_create("dfscache", 0644, proc_fs_cifs, &dfscache_proc_fops);
+#endif
+
#ifdef CONFIG_CIFS_SMB_DIRECT
proc_create("rdma_readwrite_threshold", 0644, proc_fs_cifs,
&cifs_rdma_readwrite_threshold_proc_fops);
@@ -663,6 +671,10 @@ cifs_proc_clean(void)
remove_proc_entry("SecurityFlags", proc_fs_cifs);
remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
+
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ remove_proc_entry("dfscache", proc_fs_cifs);
+#endif
#ifdef CONFIG_CIFS_SMB_DIRECT
remove_proc_entry("rdma_readwrite_threshold", proc_fs_cifs);
remove_proc_entry("smbd_max_frmr_depth", proc_fs_cifs);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index b97c74efd04a..d9b99abe1243 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -25,6 +25,7 @@
#include "dns_resolve.h"
#include "cifs_debug.h"
#include "cifs_unicode.h"
+#include "dfs_cache.h"
static LIST_HEAD(cifs_dfs_automount_list);
@@ -126,7 +127,7 @@ cifs_build_devname(char *nodename, const char *prepath)
* @sb_mountdata: parent/root DFS mount options (template)
* @fullpath: full path in UNC format
* @ref: server's referral
- * @devname: pointer for saving device name
+ * @devname: optional pointer for saving device name
*
* creates mount options for submount based on template options sb_mountdata
* and replacing unc,ip,prefixpath options with ones we've got form ref_unc.
@@ -140,6 +141,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
char **devname)
{
int rc;
+ char *name;
char *mountdata = NULL;
const char *prepath = NULL;
int md_len;
@@ -158,17 +160,17 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
prepath++;
}
- *devname = cifs_build_devname(ref->node_name, prepath);
- if (IS_ERR(*devname)) {
- rc = PTR_ERR(*devname);
- *devname = NULL;
+ name = cifs_build_devname(ref->node_name, prepath);
+ if (IS_ERR(name)) {
+ rc = PTR_ERR(name);
+ name = NULL;
goto compose_mount_options_err;
}
- rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
+ rc = dns_resolve_server_name_to_ip(name, &srvIP);
if (rc < 0) {
cifs_dbg(FYI, "%s: Failed to resolve server part of %s to IP: %d\n",
- __func__, *devname, rc);
+ __func__, name, rc);
goto compose_mount_options_err;
}
@@ -224,6 +226,9 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
strcat(mountdata, "ip=");
strcat(mountdata, srvIP);
+ if (devname)
+ *devname = name;
+
/*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/
/*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/
@@ -234,8 +239,7 @@ compose_mount_options_out:
compose_mount_options_err:
kfree(mountdata);
mountdata = ERR_PTR(rc);
- kfree(*devname);
- *devname = NULL;
+ kfree(name);
goto compose_mount_options_out;
}
@@ -251,20 +255,30 @@ static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt,
{
struct vfsmount *mnt;
char *mountdata;
- char *devname = NULL;
+ char *devname;
+
+ /*
+ * Always pass down the DFS full path to smb3_do_mount() so we
+ * can use it later for failover.
+ */
+ devname = kstrndup(fullpath, strlen(fullpath), GFP_KERNEL);
+ if (!devname)
+ return ERR_PTR(-ENOMEM);
+
+ convert_delimiter(devname, '/');
/* strip first '\' from fullpath */
mountdata = cifs_compose_mount_options(cifs_sb->mountdata,
- fullpath + 1, ref, &devname);
-
- if (IS_ERR(mountdata))
+ fullpath + 1, ref, NULL);
+ if (IS_ERR(mountdata)) {
+ kfree(devname);
return (struct vfsmount *)mountdata;
+ }
mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata);
kfree(mountdata);
kfree(devname);
return mnt;
-
}
static void dump_referral(const struct dfs_info3_param *ref)
@@ -282,16 +296,15 @@ static void dump_referral(const struct dfs_info3_param *ref)
*/
static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
{
- struct dfs_info3_param *referrals = NULL;
- unsigned int num_referrals = 0;
+ struct dfs_info3_param referral = {0};
struct cifs_sb_info *cifs_sb;
struct cifs_ses *ses;
- char *full_path;
+ struct cifs_tcon *tcon;
+ char *full_path, *root_path;
unsigned int xid;
- int i;
+ int len;
int rc;
struct vfsmount *mnt;
- struct tcon_link *tlink;
cifs_dbg(FYI, "in %s\n", __func__);
BUG_ON(IS_ROOT(mntpt));
@@ -315,48 +328,69 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
if (full_path == NULL)
goto cdda_exit;
- tlink = cifs_sb_tlink(cifs_sb);
- if (IS_ERR(tlink)) {
- mnt = ERR_CAST(tlink);
+ cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path);
+
+ if (!cifs_sb_master_tlink(cifs_sb)) {
+ cifs_dbg(FYI, "%s: master tlink is NULL\n", __func__);
goto free_full_path;
}
- ses = tlink_tcon(tlink)->ses;
+ tcon = cifs_sb_master_tcon(cifs_sb);
+ if (!tcon) {
+ cifs_dbg(FYI, "%s: master tcon is NULL\n", __func__);
+ goto free_full_path;
+ }
+
+ root_path = kstrdup(tcon->treeName, GFP_KERNEL);
+ if (!root_path) {
+ mnt = ERR_PTR(-ENOMEM);
+ goto free_full_path;
+ }
+ cifs_dbg(FYI, "%s: root path: %s\n", __func__, root_path);
+
+ ses = tcon->ses;
xid = get_xid();
- rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls,
- &num_referrals, &referrals,
- cifs_remap(cifs_sb));
- free_xid(xid);
- cifs_put_tlink(tlink);
-
- mnt = ERR_PTR(-ENOENT);
- for (i = 0; i < num_referrals; i++) {
- int len;
- dump_referral(referrals + i);
- /* connect to a node */
- len = strlen(referrals[i].node_name);
- if (len < 2) {
- cifs_dbg(VFS, "%s: Net Address path too short: %s\n",
- __func__, referrals[i].node_name);
- mnt = ERR_PTR(-EINVAL);
- break;
- }
- mnt = cifs_dfs_do_refmount(mntpt, cifs_sb,
- full_path, referrals + i);
- cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n",
- __func__, referrals[i].node_name, mnt);
- if (!IS_ERR(mnt))
- goto success;
+ /*
+ * If DFS root has been expired, then unconditionally fetch it again to
+ * refresh DFS referral cache.
+ */
+ rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
+ root_path + 1, NULL, NULL);
+ if (!rc) {
+ rc = dfs_cache_find(xid, ses, cifs_sb->local_nls,
+ cifs_remap(cifs_sb), full_path + 1,
+ &referral, NULL);
}
- /* no valid submounts were found; return error from get_dfs_path() by
- * preference */
- if (rc != 0)
+ free_xid(xid);
+
+ if (rc) {
mnt = ERR_PTR(rc);
+ goto free_root_path;
+ }
+
+ dump_referral(&referral);
-success:
- free_dfs_info_array(referrals, num_referrals);
+ len = strlen(referral.node_name);
+ if (len < 2) {
+ cifs_dbg(VFS, "%s: Net Address path too short: %s\n",
+ __func__, referral.node_name);
+ mnt = ERR_PTR(-EINVAL);
+ goto free_dfs_ref;
+ }
+ /*
+ * cifs_mount() will retry every available node server in case
+ * of failures.
+ */
+ mnt = cifs_dfs_do_refmount(mntpt, cifs_sb, full_path, &referral);
+ cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n", __func__,
+ referral.node_name, mnt);
+
+free_dfs_ref:
+ free_dfs_info_param(&referral);
+free_root_path:
+ kfree(root_path);
free_full_path:
kfree(full_path);
cdda_exit:
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 63d7530f2e1d..42f0d67f1054 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -72,6 +72,15 @@ struct cifs_sb_info {
char *mountdata; /* options received at mount time or via DFS refs */
struct delayed_work prune_tlinks;
struct rcu_head rcu;
+
+ /* only used when CIFS_MOUNT_USE_PREFIX_PATH is set */
char *prepath;
+
+ /*
+ * Path initially provided by the mount call. We might connect
+ * to something different via DFS but we want to keep it to do
+ * failover properly.
+ */
+ char *origin_fullpath; /* \\HOST\SHARE\[OPTIONAL PATH] */
};
#endif /* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 85b31cfa2f3c..d2a05e46d6f5 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -224,7 +224,7 @@ int cifs_verify_signature(struct smb_rqst *rqst,
if (cifs_pdu->Command == SMB_COM_LOCKING_ANDX) {
struct smb_com_lock_req *pSMB =
(struct smb_com_lock_req *)cifs_pdu;
- if (pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)
+ if (pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)
return 0;
}
@@ -304,12 +304,17 @@ int setup_ntlm_response(struct cifs_ses *ses, const struct nls_table *nls_cp)
int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
char *lnm_session_key)
{
- int i;
+ int i, len;
int rc;
char password_with_pad[CIFS_ENCPWD_SIZE] = {0};
- if (password)
- strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE);
+ if (password) {
+ for (len = 0; len < CIFS_ENCPWD_SIZE; len++)
+ if (!password[len])
+ break;
+
+ memcpy(password_with_pad, password, len);
+ }
if (!encrypt && global_secflags & CIFSSEC_MAY_PLNTXT) {
memcpy(lnm_session_key, password_with_pad,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 865706edb307..62d48d486d8f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -52,6 +52,9 @@
#include "cifs_spnego.h"
#include "fscache.h"
#include "smb2pdu.h"
+#ifdef CONFIG_CIFS_DFS_UPCALL
+#include "dfs_cache.h"
+#endif
int cifsFYI = 0;
bool traceSMB;
@@ -1494,10 +1497,15 @@ init_cifs(void)
if (rc)
goto out_destroy_mids;
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ rc = dfs_cache_init();
+ if (rc)
+ goto out_destroy_request_bufs;
+#endif /* CONFIG_CIFS_DFS_UPCALL */
#ifdef CONFIG_CIFS_UPCALL
rc = init_cifs_spnego();
if (rc)
- goto out_destroy_request_bufs;
+ goto out_destroy_dfs_cache;
#endif /* CONFIG_CIFS_UPCALL */
#ifdef CONFIG_CIFS_ACL
@@ -1525,6 +1533,10 @@ out_register_key_type:
#endif
#ifdef CONFIG_CIFS_UPCALL
exit_cifs_spnego();
+out_destroy_dfs_cache:
+#endif
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ dfs_cache_destroy();
out_destroy_request_bufs:
#endif
cifs_destroy_request_bufs();
@@ -1556,6 +1568,9 @@ exit_cifs(void)
#ifdef CONFIG_CIFS_UPCALL
exit_cifs_spnego();
#endif
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ dfs_cache_destroy();
+#endif
cifs_destroy_request_bufs();
cifs_destroy_mids();
cifs_destroy_inodecache();
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 4c3b5cfccc49..d1f9c2f3f575 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.14"
+#define CIFS_VERSION "2.16"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 38ab0fca49e1..94dbdbe5be34 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -701,6 +701,13 @@ struct TCP_Server_Info {
struct delayed_work reconnect; /* reconnect workqueue job */
struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
unsigned long echo_interval;
+
+ /*
+ * Number of targets available for reconnect. The more targets
+ * the more tasks have to wait to let the demultiplex thread
+ * reconnect.
+ */
+ int nr_targets;
};
static inline unsigned int
@@ -1014,6 +1021,11 @@ struct cifs_tcon {
struct list_head pending_opens; /* list of incomplete opens */
struct cached_fid crfid; /* Cached root fid */
/* BB add field for back pointer to sb struct(s)? */
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ char *dfs_path;
+ int remap:2;
+ struct list_head ulist; /* cache update list */
+#endif
};
/*
@@ -1426,6 +1438,7 @@ struct mid_q_entry {
int mid_state; /* wish this were enum but can not pass to wait_event */
unsigned int mid_flags;
__le16 command; /* smb command code */
+ unsigned int optype; /* operation type */
bool large_buf:1; /* if valid response, is pointer to large buf */
bool multiRsp:1; /* multiple trans2 responses for one request */
bool multiEnd:1; /* both received */
@@ -1508,6 +1521,7 @@ struct dfs_info3_param {
int ref_flag;
char *path_name;
char *node_name;
+ int ttl;
};
/*
@@ -1545,7 +1559,6 @@ static inline void free_dfs_info_param(struct dfs_info3_param *param)
if (param) {
kfree(param->path_name);
kfree(param->node_name);
- kfree(param);
}
}
@@ -1562,6 +1575,25 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
kfree(param);
}
+static inline bool is_interrupt_error(int error)
+{
+ switch (error) {
+ case -EINTR:
+ case -ERESTARTSYS:
+ case -ERESTARTNOHAND:
+ case -ERESTARTNOINTR:
+ return true;
+ }
+ return false;
+}
+
+static inline bool is_retryable_error(int error)
+{
+ if (is_interrupt_error(error) || error == -EAGAIN)
+ return true;
+ return false;
+}
+
#define MID_FREE 0
#define MID_REQUEST_ALLOCATED 1
#define MID_REQUEST_SUBMITTED 2
@@ -1790,6 +1822,7 @@ extern struct smb_version_values smb3any_values;
extern struct smb_version_operations smb30_operations;
extern struct smb_version_values smb30_values;
#define SMB302_VERSION_STRING "3.02"
+#define ALT_SMB302_VERSION_STRING "3.0.2"
/*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */
extern struct smb_version_values smb302_values;
#define SMB311_VERSION_STRING "3.1.1"
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fa361bc00602..336c116995d7 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -22,6 +22,9 @@
#define _CIFSPROTO_H
#include <linux/nls.h>
#include "trace.h"
+#ifdef CONFIG_CIFS_DFS_UPCALL
+#include "dfs_cache.h"
+#endif
struct statfs;
struct smb_vol;
@@ -213,7 +216,7 @@ extern int cifs_match_super(struct super_block *, void *);
extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info);
extern struct smb_vol *cifs_get_volume_info(char *mount_data,
const char *devname, bool is_smb3);
-extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
+extern int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol);
extern void cifs_umount(struct cifs_sb_info *);
extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon);
extern void cifs_reopen_persistent_handles(struct cifs_tcon *tcon);
@@ -294,11 +297,6 @@ extern int CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
unsigned int *num_of_nodes,
const struct nls_table *nls_codepage, int remap);
-extern int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
- const char *old_path,
- const struct nls_table *nls_codepage,
- unsigned int *num_referrals,
- struct dfs_info3_param **referrals, int remap);
extern int parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
unsigned int *num_of_nodes,
struct dfs_info3_param **target_nodes,
@@ -524,6 +522,11 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16,
const struct nls_table *codepage);
extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
unsigned char *p24);
+extern void
+cifs_cleanup_volume_info_contents(struct smb_vol *volume_info);
+
+extern struct TCP_Server_Info *
+cifs_find_tcp_session(struct smb_vol *vol);
void cifs_readdata_release(struct kref *refcount);
int cifs_async_readv(struct cifs_readdata *rdata);
@@ -562,4 +565,17 @@ void cifs_free_hash(struct crypto_shash **shash, struct sdesc **sdesc);
extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page,
unsigned int *len, unsigned int *offset);
+void extract_unc_hostname(const char *unc, const char **h, size_t *len);
+
+#ifdef CONFIG_CIFS_DFS_UPCALL
+static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
+ const char *old_path,
+ const struct nls_table *nls_codepage,
+ struct dfs_info3_param *referral, int remap)
+{
+ return dfs_cache_find(xid, ses, nls_codepage, remap, old_path,
+ referral, NULL);
+}
+#endif
+
#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f82fd342bca5..e18915415e13 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -44,6 +44,9 @@
#include "cifs_debug.h"
#include "fscache.h"
#include "smbdirect.h"
+#ifdef CONFIG_CIFS_DFS_UPCALL
+#include "dfs_cache.h"
+#endif
#ifdef CONFIG_CIFS_POSIX
static struct {
@@ -118,6 +121,86 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
*/
}
+#ifdef CONFIG_CIFS_DFS_UPCALL
+static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
+ struct cifs_tcon *tcon)
+{
+ int rc;
+ struct dfs_cache_tgt_list tl;
+ struct dfs_cache_tgt_iterator *it = NULL;
+ char *tree;
+ const char *tcp_host;
+ size_t tcp_host_len;
+ const char *dfs_host;
+ size_t dfs_host_len;
+
+ tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
+ if (!tree)
+ return -ENOMEM;
+
+ if (tcon->ipc) {
+ snprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$",
+ tcon->ses->server->hostname);
+ rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
+ goto out;
+ }
+
+ if (!tcon->dfs_path) {
+ rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+ goto out;
+ }
+
+ rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl);
+ if (rc)
+ goto out;
+
+ extract_unc_hostname(tcon->ses->server->hostname, &tcp_host,
+ &tcp_host_len);
+
+ for (it = dfs_cache_get_tgt_iterator(&tl); it;
+ it = dfs_cache_get_next_tgt(&tl, it)) {
+ const char *tgt = dfs_cache_get_tgt_name(it);
+
+ extract_unc_hostname(tgt, &dfs_host, &dfs_host_len);
+
+ if (dfs_host_len != tcp_host_len
+ || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) {
+ cifs_dbg(FYI, "%s: skipping %.*s, doesn't match %.*s",
+ __func__,
+ (int)dfs_host_len, dfs_host,
+ (int)tcp_host_len, tcp_host);
+ continue;
+ }
+
+ snprintf(tree, MAX_TREE_SIZE, "\\%s", tgt);
+
+ rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
+ if (!rc)
+ break;
+ if (rc == -EREMOTE)
+ break;
+ }
+
+ if (!rc) {
+ if (it)
+ rc = dfs_cache_noreq_update_tgthint(tcon->dfs_path + 1,
+ it);
+ else
+ rc = -ENOENT;
+ }
+ dfs_cache_free_tgts(&tl);
+out:
+ kfree(tree);
+ return rc;
+}
+#else
+static inline int __cifs_reconnect_tcon(const struct nls_table *nlsc,
+ struct cifs_tcon *tcon)
+{
+ return CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+}
+#endif
+
/* reconnect the socket, tcon, and smb session if needed */
static int
cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
@@ -126,6 +209,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
struct cifs_ses *ses;
struct TCP_Server_Info *server;
struct nls_table *nls_codepage;
+ int retries;
/*
* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for
@@ -152,9 +236,12 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
}
}
+ retries = server->nr_targets;
+
/*
- * Give demultiplex thread up to 10 seconds to reconnect, should be
- * greater than cifs socket timeout which is 7 seconds
+ * Give demultiplex thread up to 10 seconds to each target available for
+ * reconnect -- should be greater than cifs socket timeout which is 7
+ * seconds.
*/
while (server->tcpStatus == CifsNeedReconnect) {
rc = wait_event_interruptible_timeout(server->response_q,
@@ -170,6 +257,9 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
if (server->tcpStatus != CifsNeedReconnect)
break;
+ if (--retries)
+ continue;
+
/*
* on "soft" mounts we wait once. Hard mounts keep
* retrying until process is killed or server comes
@@ -179,6 +269,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n");
return -EHOSTDOWN;
}
+ retries = server->nr_targets;
}
if (!ses->need_reconnect && !tcon->need_reconnect)
@@ -214,7 +305,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
}
cifs_mark_open_files_invalid(tcon);
- rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage);
+ rc = __cifs_reconnect_tcon(nls_codepage, tcon);
mutex_unlock(&ses->session_mutex);
cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc);
@@ -2032,7 +2123,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
for (j = 0; j < nr_pages; j++) {
unlock_page(wdata2->pages[j]);
- if (rc != 0 && rc != -EAGAIN) {
+ if (rc != 0 && !is_retryable_error(rc)) {
SetPageError(wdata2->pages[j]);
end_page_writeback(wdata2->pages[j]);
put_page(wdata2->pages[j]);
@@ -2041,7 +2132,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
if (rc) {
kref_put(&wdata2->refcount, cifs_writedata_release);
- if (rc == -EAGAIN)
+ if (is_retryable_error(rc))
continue;
break;
}
@@ -2050,7 +2141,8 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
i += nr_pages;
} while (i < wdata->nr_pages);
- mapping_set_error(inode->i_mapping, rc);
+ if (rc != 0 && !is_retryable_error(rc))
+ mapping_set_error(inode->i_mapping, rc);
kref_put(&wdata->refcount, cifs_writedata_release);
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 6f24f129a751..683310f26171 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -56,6 +56,11 @@
#include "fscache.h"
#include "smb2proto.h"
#include "smbdirect.h"
+#include "dns_resolve.h"
+#include "cifsfs.h"
+#ifdef CONFIG_CIFS_DFS_UPCALL
+#include "dfs_cache.h"
+#endif
extern mempool_t *cifs_req_poolp;
extern bool disable_legacy_dialects;
@@ -304,6 +309,7 @@ static const match_table_t cifs_smb_version_tokens = {
{ Smb_21, SMB21_VERSION_STRING },
{ Smb_30, SMB30_VERSION_STRING },
{ Smb_302, SMB302_VERSION_STRING },
+ { Smb_302, ALT_SMB302_VERSION_STRING },
{ Smb_311, SMB311_VERSION_STRING },
{ Smb_311, ALT_SMB311_VERSION_STRING },
{ Smb_3any, SMB3ANY_VERSION_STRING },
@@ -317,6 +323,132 @@ static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
static void cifs_prune_tlinks(struct work_struct *work);
static int cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data,
const char *devname, bool is_smb3);
+static char *extract_hostname(const char *unc);
+
+/*
+ * Resolve hostname and set ip addr in tcp ses. Useful for hostnames that may
+ * get their ip addresses changed at some point.
+ *
+ * This should be called with server->srv_mutex held.
+ */
+#ifdef CONFIG_CIFS_DFS_UPCALL
+static int reconn_set_ipaddr(struct TCP_Server_Info *server)
+{
+ int rc;
+ int len;
+ char *unc, *ipaddr = NULL;
+
+ if (!server->hostname)
+ return -EINVAL;
+
+ len = strlen(server->hostname) + 3;
+
+ unc = kmalloc(len, GFP_KERNEL);
+ if (!unc) {
+ cifs_dbg(FYI, "%s: failed to create UNC path\n", __func__);
+ return -ENOMEM;
+ }
+ snprintf(unc, len, "\\\\%s", server->hostname);
+
+ rc = dns_resolve_server_name_to_ip(unc, &ipaddr);
+ kfree(unc);
+
+ if (rc < 0) {
+ cifs_dbg(FYI, "%s: failed to resolve server part of %s to IP: %d\n",
+ __func__, server->hostname, rc);
+ return rc;
+ }
+
+ rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr,
+ strlen(ipaddr));
+ kfree(ipaddr);
+
+ return !rc ? -1 : 0;
+}
+#else
+static inline int reconn_set_ipaddr(struct TCP_Server_Info *server)
+{
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_CIFS_DFS_UPCALL
+struct super_cb_data {
+ struct TCP_Server_Info *server;
+ struct cifs_sb_info *cifs_sb;
+};
+
+/* These functions must be called with server->srv_mutex held */
+
+static void super_cb(struct super_block *sb, void *arg)
+{
+ struct super_cb_data *d = arg;
+ struct cifs_sb_info *cifs_sb;
+ struct cifs_tcon *tcon;
+
+ if (d->cifs_sb)
+ return;
+
+ cifs_sb = CIFS_SB(sb);
+ tcon = cifs_sb_master_tcon(cifs_sb);
+ if (tcon->ses->server == d->server)
+ d->cifs_sb = cifs_sb;
+}
+
+static inline struct cifs_sb_info *
+find_super_by_tcp(struct TCP_Server_Info *server)
+{
+ struct super_cb_data d = {
+ .server = server,
+ .cifs_sb = NULL,
+ };
+
+ iterate_supers_type(&cifs_fs_type, super_cb, &d);
+ return d.cifs_sb ? d.cifs_sb : ERR_PTR(-ENOENT);
+}
+
+static void reconn_inval_dfs_target(struct TCP_Server_Info *server,
+ struct cifs_sb_info *cifs_sb,
+ struct dfs_cache_tgt_list *tgt_list,
+ struct dfs_cache_tgt_iterator **tgt_it)
+{
+ const char *name;
+
+ if (!cifs_sb || !cifs_sb->origin_fullpath || !tgt_list ||
+ !server->nr_targets)
+ return;
+
+ if (!*tgt_it) {
+ *tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
+ } else {
+ *tgt_it = dfs_cache_get_next_tgt(tgt_list, *tgt_it);
+ if (!*tgt_it)
+ *tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
+ }
+
+ cifs_dbg(FYI, "%s: UNC: %s\n", __func__, cifs_sb->origin_fullpath);
+
+ name = dfs_cache_get_tgt_name(*tgt_it);
+
+ kfree(server->hostname);
+
+ server->hostname = extract_hostname(name);
+ if (IS_ERR(server->hostname)) {
+ cifs_dbg(FYI,
+ "%s: failed to extract hostname from target: %ld\n",
+ __func__, PTR_ERR(server->hostname));
+ }
+}
+
+static inline int reconn_setup_dfs_targets(struct cifs_sb_info *cifs_sb,
+ struct dfs_cache_tgt_list *tl,
+ struct dfs_cache_tgt_iterator **it)
+{
+ if (!cifs_sb->origin_fullpath)
+ return -EOPNOTSUPP;
+ return dfs_cache_noreq_find(cifs_sb->origin_fullpath + 1, NULL, tl);
+}
+#endif
/*
* cifs tcp session reconnection
@@ -335,8 +467,33 @@ cifs_reconnect(struct TCP_Server_Info *server)
struct cifs_tcon *tcon;
struct mid_q_entry *mid_entry;
struct list_head retry_list;
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ struct cifs_sb_info *cifs_sb = NULL;
+ struct dfs_cache_tgt_list tgt_list = {0};
+ struct dfs_cache_tgt_iterator *tgt_it = NULL;
+#endif
spin_lock(&GlobalMid_Lock);
+ server->nr_targets = 1;
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ cifs_sb = find_super_by_tcp(server);
+ if (IS_ERR(cifs_sb)) {
+ rc = PTR_ERR(cifs_sb);
+ cifs_dbg(FYI, "%s: will not do DFS failover: rc = %d\n",
+ __func__, rc);
+ cifs_sb = NULL;
+ } else {
+ rc = reconn_setup_dfs_targets(cifs_sb, &tgt_list, &tgt_it);
+ if (rc && (rc != -EOPNOTSUPP)) {
+ cifs_dbg(VFS, "%s: no target servers for DFS failover\n",
+ __func__);
+ } else {
+ server->nr_targets = dfs_cache_get_nr_tgts(&tgt_list);
+ }
+ }
+ cifs_dbg(FYI, "%s: will retry %d target(s)\n", __func__,
+ server->nr_targets);
+#endif
if (server->tcpStatus == CifsExiting) {
/* the demux thread will exit normally
next time through the loop */
@@ -410,14 +567,27 @@ cifs_reconnect(struct TCP_Server_Info *server)
do {
try_to_freeze();
- /* we should try only the port we connected to before */
mutex_lock(&server->srv_mutex);
+ /*
+ * Set up next DFS target server (if any) for reconnect. If DFS
+ * feature is disabled, then we will retry last server we
+ * connected to before.
+ */
if (cifs_rdma_enabled(server))
rc = smbd_reconnect(server);
else
rc = generic_ip_connect(server);
if (rc) {
cifs_dbg(FYI, "reconnect error %d\n", rc);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ reconn_inval_dfs_target(server, cifs_sb, &tgt_list,
+ &tgt_it);
+#endif
+ rc = reconn_set_ipaddr(server);
+ if (rc) {
+ cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
+ __func__, rc);
+ }
mutex_unlock(&server->srv_mutex);
msleep(3000);
} else {
@@ -430,6 +600,22 @@ cifs_reconnect(struct TCP_Server_Info *server)
}
} while (server->tcpStatus == CifsNeedReconnect);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (tgt_it) {
+ rc = dfs_cache_noreq_update_tgthint(cifs_sb->origin_fullpath + 1,
+ tgt_it);
+ if (rc) {
+ cifs_dbg(VFS, "%s: failed to update DFS target hint: rc = %d\n",
+ __func__, rc);
+ }
+ rc = dfs_cache_update_vol(cifs_sb->origin_fullpath, server);
+ if (rc) {
+ cifs_dbg(VFS, "%s: failed to update vol info in DFS cache: rc = %d\n",
+ __func__, rc);
+ }
+ dfs_cache_free_tgts(&tgt_list);
+ }
+#endif
if (server->tcpStatus == CifsNeedNegotiate)
mod_delayed_work(cifsiod_wq, &server->echo, 0);
@@ -1043,7 +1229,12 @@ extract_hostname(const char *unc)
/* skip double chars at beginning of string */
/* BB: check validity of these bytes? */
- src = unc + 2;
+ if (strlen(unc) < 3)
+ return ERR_PTR(-EINVAL);
+ for (src = unc; *src && *src == '\\'; src++)
+ ;
+ if (!*src)
+ return ERR_PTR(-EINVAL);
/* delimiter between hostname and sharename is always '\\' now */
delim = strchr(src, '\\');
@@ -1827,7 +2018,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->password = NULL;
break;
}
- /* Yes it is. Drop down to Opt_pass below.*/
+ /* Fallthrough - to Opt_pass below.*/
case Opt_pass:
/* Obtain the value string */
value = strchr(data, '=');
@@ -2289,7 +2480,7 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
return 1;
}
-static struct TCP_Server_Info *
+struct TCP_Server_Info *
cifs_find_tcp_session(struct smb_vol *vol)
{
struct TCP_Server_Info *server;
@@ -2461,6 +2652,8 @@ smbd_connected:
}
tcp_ses->tcpStatus = CifsNeedNegotiate;
+ tcp_ses->nr_targets = 1;
+
/* thread spawned, put it on the list */
spin_lock(&cifs_tcp_ses_lock);
list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
@@ -3256,25 +3449,6 @@ out:
return rc;
}
-int
-get_dfs_path(const unsigned int xid, struct cifs_ses *ses, const char *old_path,
- const struct nls_table *nls_codepage, unsigned int *num_referrals,
- struct dfs_info3_param **referrals, int remap)
-{
- int rc = 0;
-
- if (!ses->server->ops->get_dfs_refer)
- return -ENOSYS;
-
- *num_referrals = 0;
- *referrals = NULL;
-
- rc = ses->server->ops->get_dfs_refer(xid, ses, old_path,
- referrals, num_referrals,
- nls_codepage, remap);
- return rc;
-}
-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key cifs_key[2];
static struct lock_class_key cifs_slock_key[2];
@@ -3746,8 +3920,8 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
return 0;
}
-static void
-cleanup_volume_info_contents(struct smb_vol *volume_info)
+void
+cifs_cleanup_volume_info_contents(struct smb_vol *volume_info)
{
kfree(volume_info->username);
kzfree(volume_info->password);
@@ -3762,10 +3936,136 @@ cifs_cleanup_volume_info(struct smb_vol *volume_info)
{
if (!volume_info)
return;
- cleanup_volume_info_contents(volume_info);
+ cifs_cleanup_volume_info_contents(volume_info);
kfree(volume_info);
}
+/* Release all succeed connections */
+static inline void mount_put_conns(struct cifs_sb_info *cifs_sb,
+ unsigned int xid,
+ struct TCP_Server_Info *server,
+ struct cifs_ses *ses, struct cifs_tcon *tcon)
+{
+ int rc = 0;
+
+ if (tcon)
+ cifs_put_tcon(tcon);
+ else if (ses)
+ cifs_put_smb_ses(ses);
+ else if (server)
+ cifs_put_tcp_session(server, 0);
+ cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
+ free_xid(xid);
+}
+
+/* Get connections for tcp, ses and tcon */
+static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
+ unsigned int *xid,
+ struct TCP_Server_Info **nserver,
+ struct cifs_ses **nses, struct cifs_tcon **ntcon)
+{
+ int rc = 0;
+ struct TCP_Server_Info *server;
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon;
+
+ *nserver = NULL;
+ *nses = NULL;
+ *ntcon = NULL;
+
+ *xid = get_xid();
+
+ /* get a reference to a tcp session */
+ server = cifs_get_tcp_session(vol);
+ if (IS_ERR(server)) {
+ rc = PTR_ERR(server);
+ return rc;
+ }
+
+ *nserver = server;
+
+ if ((vol->max_credits < 20) || (vol->max_credits > 60000))
+ server->max_credits = SMB2_MAX_CREDITS_AVAILABLE;
+ else
+ server->max_credits = vol->max_credits;
+
+ /* get a reference to a SMB session */
+ ses = cifs_get_smb_ses(server, vol);
+ if (IS_ERR(ses)) {
+ rc = PTR_ERR(ses);
+ return rc;
+ }
+
+ *nses = ses;
+
+ if ((vol->persistent == true) && (!(ses->server->capabilities &
+ SMB2_GLOBAL_CAP_PERSISTENT_HANDLES))) {
+ cifs_dbg(VFS, "persistent handles not supported by server\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* search for existing tcon to this server share */
+ tcon = cifs_get_tcon(ses, vol);
+ if (IS_ERR(tcon)) {
+ rc = PTR_ERR(tcon);
+ return rc;
+ }
+
+ *ntcon = tcon;
+
+ /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
+ if (tcon->posix_extensions)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
+
+ /* tell server which Unix caps we support */
+ if (cap_unix(tcon->ses)) {
+ /*
+ * reset of caps checks mount to see if unix extensions disabled
+ * for just this mount.
+ */
+ reset_cifs_unix_caps(*xid, tcon, cifs_sb, vol);
+ if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
+ (le64_to_cpu(tcon->fsUnixInfo.Capability) &
+ CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP))
+ return -EACCES;
+ } else
+ tcon->unix_ext = 0; /* server does not support them */
+
+ /* do not care if a following call succeed - informational */
+ if (!tcon->pipe && server->ops->qfs_tcon)
+ server->ops->qfs_tcon(*xid, tcon);
+
+ cifs_sb->wsize = server->ops->negotiate_wsize(tcon, vol);
+ cifs_sb->rsize = server->ops->negotiate_rsize(tcon, vol);
+
+ return 0;
+}
+
+static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+ struct cifs_tcon *tcon)
+{
+ struct tcon_link *tlink;
+
+ /* hang the tcon off of the superblock */
+ tlink = kzalloc(sizeof(*tlink), GFP_KERNEL);
+ if (tlink == NULL)
+ return -ENOMEM;
+
+ tlink->tl_uid = ses->linux_uid;
+ tlink->tl_tcon = tcon;
+ tlink->tl_time = jiffies;
+ set_bit(TCON_LINK_MASTER, &tlink->tl_flags);
+ set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
+
+ cifs_sb->master_tlink = tlink;
+ spin_lock(&cifs_sb->tlink_tree_lock);
+ tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
+ spin_unlock(&cifs_sb->tlink_tree_lock);
+
+ queue_delayed_work(cifsiod_wq, &cifs_sb->prune_tlinks,
+ TLINK_IDLE_EXPIRE);
+ return 0;
+}
#ifdef CONFIG_CIFS_DFS_UPCALL
/*
@@ -3774,10 +4074,11 @@ cifs_cleanup_volume_info(struct smb_vol *volume_info)
*/
static char *
build_unc_path_to_root(const struct smb_vol *vol,
- const struct cifs_sb_info *cifs_sb)
+ const struct cifs_sb_info *cifs_sb, bool useppath)
{
char *full_path, *pos;
- unsigned int pplen = vol->prepath ? strlen(vol->prepath) + 1 : 0;
+ unsigned int pplen = useppath && vol->prepath ?
+ strlen(vol->prepath) + 1 : 0;
unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1);
full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL);
@@ -3799,8 +4100,9 @@ build_unc_path_to_root(const struct smb_vol *vol,
return full_path;
}
-/*
- * Perform a dfs referral query for a share and (optionally) prefix
+/**
+ * expand_dfs_referral - Perform a dfs referral query and update the cifs_sb
+ *
*
* If a referral is found, cifs_sb->mountdata will be (re-)allocated
* to a string containing updated options for the submount. Otherwise it
@@ -3815,39 +4117,36 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
int check_prefix)
{
int rc;
- unsigned int num_referrals = 0;
- struct dfs_info3_param *referrals = NULL;
+ struct dfs_info3_param referral = {0};
char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
return -EREMOTE;
- full_path = build_unc_path_to_root(volume_info, cifs_sb);
+ full_path = build_unc_path_to_root(volume_info, cifs_sb, true);
if (IS_ERR(full_path))
return PTR_ERR(full_path);
/* For DFS paths, skip the first '\' of the UNC */
ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1;
- rc = get_dfs_path(xid, ses, ref_path, cifs_sb->local_nls,
- &num_referrals, &referrals, cifs_remap(cifs_sb));
-
- if (!rc && num_referrals > 0) {
+ rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
+ ref_path, &referral, NULL);
+ if (!rc) {
char *fake_devname = NULL;
mdata = cifs_compose_mount_options(cifs_sb->mountdata,
- full_path + 1, referrals,
+ full_path + 1, &referral,
&fake_devname);
-
- free_dfs_info_array(referrals, num_referrals);
+ free_dfs_info_param(&referral);
if (IS_ERR(mdata)) {
rc = PTR_ERR(mdata);
mdata = NULL;
} else {
- cleanup_volume_info_contents(volume_info);
+ cifs_cleanup_volume_info_contents(volume_info);
rc = cifs_setup_volume_info(volume_info, mdata,
- fake_devname, false);
+ fake_devname, false);
}
kfree(fake_devname);
kfree(cifs_sb->mountdata);
@@ -3856,6 +4155,143 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
kfree(full_path);
return rc;
}
+
+static inline int get_next_dfs_tgt(const char *path,
+ struct dfs_cache_tgt_list *tgt_list,
+ struct dfs_cache_tgt_iterator **tgt_it)
+{
+ if (!*tgt_it)
+ *tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
+ else
+ *tgt_it = dfs_cache_get_next_tgt(tgt_list, *tgt_it);
+ return !*tgt_it ? -EHOSTDOWN : 0;
+}
+
+static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it,
+ struct smb_vol *fake_vol, struct smb_vol *vol)
+{
+ const char *tgt = dfs_cache_get_tgt_name(tgt_it);
+ int len = strlen(tgt) + 2;
+ char *new_unc;
+
+ new_unc = kmalloc(len, GFP_KERNEL);
+ if (!new_unc)
+ return -ENOMEM;
+ snprintf(new_unc, len, "\\%s", tgt);
+
+ kfree(vol->UNC);
+ vol->UNC = new_unc;
+
+ if (fake_vol->prepath) {
+ kfree(vol->prepath);
+ vol->prepath = fake_vol->prepath;
+ fake_vol->prepath = NULL;
+ }
+ memcpy(&vol->dstaddr, &fake_vol->dstaddr, sizeof(vol->dstaddr));
+
+ return 0;
+}
+
+static int setup_dfs_tgt_conn(const char *path,
+ const struct dfs_cache_tgt_iterator *tgt_it,
+ struct cifs_sb_info *cifs_sb,
+ struct smb_vol *vol,
+ unsigned int *xid,
+ struct TCP_Server_Info **server,
+ struct cifs_ses **ses,
+ struct cifs_tcon **tcon)
+{
+ int rc;
+ struct dfs_info3_param ref = {0};
+ char *mdata = NULL, *fake_devname = NULL;
+ struct smb_vol fake_vol = {0};
+
+ cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path);
+
+ rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref);
+ if (rc)
+ return rc;
+
+ mdata = cifs_compose_mount_options(cifs_sb->mountdata, path, &ref,
+ &fake_devname);
+ free_dfs_info_param(&ref);
+
+ if (IS_ERR(mdata)) {
+ rc = PTR_ERR(mdata);
+ mdata = NULL;
+ } else {
+ cifs_dbg(FYI, "%s: fake_devname: %s\n", __func__, fake_devname);
+ rc = cifs_setup_volume_info(&fake_vol, mdata, fake_devname,
+ false);
+ }
+ kfree(mdata);
+ kfree(fake_devname);
+
+ if (!rc) {
+ /*
+ * We use a 'fake_vol' here because we need pass it down to the
+ * mount_{get,put} functions to test connection against new DFS
+ * targets.
+ */
+ mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
+ rc = mount_get_conns(&fake_vol, cifs_sb, xid, server, ses,
+ tcon);
+ if (!rc) {
+ /*
+ * We were able to connect to new target server.
+ * Update current volume info with new target server.
+ */
+ rc = update_vol_info(tgt_it, &fake_vol, vol);
+ }
+ }
+ cifs_cleanup_volume_info_contents(&fake_vol);
+ return rc;
+}
+
+static int mount_do_dfs_failover(const char *path,
+ struct cifs_sb_info *cifs_sb,
+ struct smb_vol *vol,
+ struct cifs_ses *root_ses,
+ unsigned int *xid,
+ struct TCP_Server_Info **server,
+ struct cifs_ses **ses,
+ struct cifs_tcon **tcon)
+{
+ int rc;
+ struct dfs_cache_tgt_list tgt_list;
+ struct dfs_cache_tgt_iterator *tgt_it = NULL;
+
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
+ return -EOPNOTSUPP;
+
+ rc = dfs_cache_noreq_find(path, NULL, &tgt_list);
+ if (rc)
+ return rc;
+
+ for (;;) {
+ /* Get next DFS target server - if any */
+ rc = get_next_dfs_tgt(path, &tgt_list, &tgt_it);
+ if (rc)
+ break;
+ /* Connect to next DFS target */
+ rc = setup_dfs_tgt_conn(path, tgt_it, cifs_sb, vol, xid, server,
+ ses, tcon);
+ if (!rc || rc == -EACCES || rc == -EOPNOTSUPP)
+ break;
+ }
+ if (!rc) {
+ /*
+ * Update DFS target hint in DFS referral cache with the target
+ * server we successfully reconnected to.
+ */
+ rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses,
+ cifs_sb->local_nls,
+ cifs_remap(cifs_sb), path,
+ tgt_it);
+ }
+ dfs_cache_free_tgts(&tgt_list);
+ return rc;
+}
#endif
static int
@@ -3954,107 +4390,108 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server,
return rc;
}
-int
-cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
+/*
+ * Check if path is remote (e.g. a DFS share). Return -EREMOTE if it is,
+ * otherwise 0.
+ */
+static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
+ const unsigned int xid,
+ struct TCP_Server_Info *server,
+ struct cifs_tcon *tcon)
{
int rc;
- unsigned int xid;
- struct cifs_ses *ses;
- struct cifs_tcon *tcon;
- struct TCP_Server_Info *server;
- char *full_path;
- struct tcon_link *tlink;
-#ifdef CONFIG_CIFS_DFS_UPCALL
- int referral_walks_count = 0;
-#endif
-
-#ifdef CONFIG_CIFS_DFS_UPCALL
-try_mount_again:
- /* cleanup activities if we're chasing a referral */
- if (referral_walks_count) {
- if (tcon)
- cifs_put_tcon(tcon);
- else if (ses)
- cifs_put_smb_ses(ses);
+ char *full_path;
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
+ if (!server->ops->is_path_accessible)
+ return -EOPNOTSUPP;
- free_xid(xid);
- }
-#endif
- rc = 0;
- tcon = NULL;
- ses = NULL;
- server = NULL;
- full_path = NULL;
- tlink = NULL;
+ /*
+ * cifs_build_path_to_root works only when we have a valid tcon
+ */
+ full_path = cifs_build_path_to_root(vol, cifs_sb, tcon,
+ tcon->Flags & SMB_SHARE_IS_IN_DFS);
+ if (full_path == NULL)
+ return -ENOMEM;
- xid = get_xid();
+ cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path);
- /* get a reference to a tcp session */
- server = cifs_get_tcp_session(volume_info);
- if (IS_ERR(server)) {
- rc = PTR_ERR(server);
- goto out;
- }
- if ((volume_info->max_credits < 20) ||
- (volume_info->max_credits > 60000))
- server->max_credits = SMB2_MAX_CREDITS_AVAILABLE;
- else
- server->max_credits = volume_info->max_credits;
- /* get a reference to a SMB session */
- ses = cifs_get_smb_ses(server, volume_info);
- if (IS_ERR(ses)) {
- rc = PTR_ERR(ses);
- ses = NULL;
- goto mount_fail_check;
+ rc = server->ops->is_path_accessible(xid, tcon, cifs_sb,
+ full_path);
+ if (rc != 0 && rc != -EREMOTE) {
+ kfree(full_path);
+ return rc;
}
- if ((volume_info->persistent == true) && ((ses->server->capabilities &
- SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) == 0)) {
- cifs_dbg(VFS, "persistent handles not supported by server\n");
- rc = -EOPNOTSUPP;
- goto mount_fail_check;
+ if (rc != -EREMOTE) {
+ rc = cifs_are_all_path_components_accessible(server, xid, tcon,
+ cifs_sb,
+ full_path);
+ if (rc != 0) {
+ cifs_dbg(VFS, "cannot query dirs between root and final path, "
+ "enabling CIFS_MOUNT_USE_PREFIX_PATH\n");
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+ rc = 0;
+ }
}
- /* search for existing tcon to this server share */
- tcon = cifs_get_tcon(ses, volume_info);
- if (IS_ERR(tcon)) {
- rc = PTR_ERR(tcon);
- tcon = NULL;
- if (rc == -EACCES)
- goto mount_fail_check;
-
- goto remote_path_check;
- }
+ kfree(full_path);
+ return rc;
+}
- /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
- if (tcon->posix_extensions)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
+#ifdef CONFIG_CIFS_DFS_UPCALL
+int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
+{
+ int rc = 0;
+ unsigned int xid;
+ struct cifs_ses *ses;
+ struct cifs_tcon *root_tcon = NULL;
+ struct cifs_tcon *tcon = NULL;
+ struct TCP_Server_Info *server;
+ char *root_path = NULL, *full_path = NULL;
+ char *old_mountdata;
+ int count;
- /* tell server which Unix caps we support */
- if (cap_unix(tcon->ses)) {
- /* reset of caps checks mount to see if unix extensions
- disabled for just this mount */
- reset_cifs_unix_caps(xid, tcon, cifs_sb, volume_info);
- if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
- (le64_to_cpu(tcon->fsUnixInfo.Capability) &
- CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) {
- rc = -EACCES;
- goto mount_fail_check;
+ rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon);
+ if (!rc && tcon) {
+ /* If not a standalone DFS root, then check if path is remote */
+ rc = dfs_cache_find(xid, ses, cifs_sb->local_nls,
+ cifs_remap(cifs_sb), vol->UNC + 1, NULL,
+ NULL);
+ if (rc) {
+ rc = is_path_remote(cifs_sb, vol, xid, server, tcon);
+ if (!rc)
+ goto out;
+ if (rc != -EREMOTE)
+ goto error;
}
- } else
- tcon->unix_ext = 0; /* server does not support them */
-
- /* do not care if a following call succeed - informational */
- if (!tcon->pipe && server->ops->qfs_tcon)
- server->ops->qfs_tcon(xid, tcon);
-
- cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info);
- cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info);
+ }
+ /*
+ * If first DFS target server went offline and we failed to connect it,
+ * server and ses pointers are NULL at this point, though we still have
+ * chance to get a cached DFS referral in expand_dfs_referral() and
+ * retry next target available in it.
+ *
+ * If a NULL ses ptr is passed to dfs_cache_find(), a lookup will be
+ * performed against DFS path and *no* requests will be sent to server
+ * for any new DFS referrals. Hence it's safe to skip checking whether
+ * server or ses ptr is NULL.
+ */
+ if (rc == -EACCES || rc == -EOPNOTSUPP)
+ goto error;
+
+ root_path = build_unc_path_to_root(vol, cifs_sb, false);
+ if (IS_ERR(root_path)) {
+ rc = PTR_ERR(root_path);
+ root_path = NULL;
+ goto error;
+ }
-remote_path_check:
-#ifdef CONFIG_CIFS_DFS_UPCALL
+ full_path = build_unc_path_to_root(vol, cifs_sb, true);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
+ full_path = NULL;
+ goto error;
+ }
/*
* Perform an unconditional check for whether there are DFS
* referrals for this path without prefix, to provide support
@@ -4062,119 +4499,173 @@ remote_path_check:
* with PATH_NOT_COVERED to requests that include the prefix.
* Chase the referral if found, otherwise continue normally.
*/
- if (referral_walks_count == 0) {
- int refrc = expand_dfs_referral(xid, ses, volume_info, cifs_sb,
- false);
- if (!refrc) {
- referral_walks_count++;
- goto try_mount_again;
- }
+ old_mountdata = cifs_sb->mountdata;
+ (void)expand_dfs_referral(xid, ses, vol, cifs_sb, false);
+
+ if (cifs_sb->mountdata == NULL) {
+ rc = -ENOENT;
+ goto error;
}
-#endif
- /* check if a whole path is not remote */
- if (!rc && tcon) {
- if (!server->ops->is_path_accessible) {
- rc = -ENOSYS;
- goto mount_fail_check;
+ if (cifs_sb->mountdata != old_mountdata) {
+ /* If we were redirected, reconnect to new target server */
+ mount_put_conns(cifs_sb, xid, server, ses, tcon);
+ rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon);
+ }
+ if (rc) {
+ if (rc == -EACCES || rc == -EOPNOTSUPP)
+ goto error;
+ /* Perform DFS failover to any other DFS targets */
+ rc = mount_do_dfs_failover(root_path + 1, cifs_sb, vol, NULL,
+ &xid, &server, &ses, &tcon);
+ if (rc)
+ goto error;
+ }
+
+ kfree(root_path);
+ root_path = build_unc_path_to_root(vol, cifs_sb, false);
+ if (IS_ERR(root_path)) {
+ rc = PTR_ERR(root_path);
+ root_path = NULL;
+ goto error;
+ }
+ /* Cache out resolved root server */
+ (void)dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
+ root_path + 1, NULL, NULL);
+ /*
+ * Save root tcon for additional DFS requests to update or create a new
+ * DFS cache entry, or even perform DFS failover.
+ */
+ spin_lock(&cifs_tcp_ses_lock);
+ tcon->tc_count++;
+ tcon->dfs_path = root_path;
+ root_path = NULL;
+ tcon->remap = cifs_remap(cifs_sb);
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ root_tcon = tcon;
+
+ for (count = 1; ;) {
+ if (!rc && tcon) {
+ rc = is_path_remote(cifs_sb, vol, xid, server, tcon);
+ if (!rc || rc != -EREMOTE)
+ break;
}
/*
- * cifs_build_path_to_root works only when we have a valid tcon
+ * BB: when we implement proper loop detection,
+ * we will remove this check. But now we need it
+ * to prevent an indefinite loop if 'DFS tree' is
+ * misconfigured (i.e. has loops).
*/
- full_path = cifs_build_path_to_root(volume_info, cifs_sb, tcon,
- tcon->Flags & SMB_SHARE_IS_IN_DFS);
- if (full_path == NULL) {
- rc = -ENOMEM;
- goto mount_fail_check;
- }
- rc = server->ops->is_path_accessible(xid, tcon, cifs_sb,
- full_path);
- if (rc != 0 && rc != -EREMOTE) {
- kfree(full_path);
- goto mount_fail_check;
+ if (count++ > MAX_NESTED_LINKS) {
+ rc = -ELOOP;
+ break;
}
- if (rc != -EREMOTE) {
- rc = cifs_are_all_path_components_accessible(server,
- xid, tcon, cifs_sb,
- full_path);
- if (rc != 0) {
- cifs_dbg(VFS, "cannot query dirs between root and final path, "
- "enabling CIFS_MOUNT_USE_PREFIX_PATH\n");
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
- rc = 0;
- }
- }
kfree(full_path);
- }
-
- /* get referral if needed */
- if (rc == -EREMOTE) {
-#ifdef CONFIG_CIFS_DFS_UPCALL
- if (referral_walks_count > MAX_NESTED_LINKS) {
- /*
- * BB: when we implement proper loop detection,
- * we will remove this check. But now we need it
- * to prevent an indefinite loop if 'DFS tree' is
- * misconfigured (i.e. has loops).
- */
- rc = -ELOOP;
- goto mount_fail_check;
+ full_path = build_unc_path_to_root(vol, cifs_sb, true);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
+ full_path = NULL;
+ break;
}
- rc = expand_dfs_referral(xid, ses, volume_info, cifs_sb, true);
+ old_mountdata = cifs_sb->mountdata;
+ rc = expand_dfs_referral(xid, root_tcon->ses, vol, cifs_sb,
+ true);
+ if (rc)
+ break;
- if (!rc) {
- referral_walks_count++;
- goto try_mount_again;
+ if (cifs_sb->mountdata != old_mountdata) {
+ mount_put_conns(cifs_sb, xid, server, ses, tcon);
+ rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses,
+ &tcon);
+ }
+ if (rc) {
+ if (rc == -EACCES || rc == -EOPNOTSUPP)
+ break;
+ /* Perform DFS failover to any other DFS targets */
+ rc = mount_do_dfs_failover(full_path + 1, cifs_sb, vol,
+ root_tcon->ses, &xid,
+ &server, &ses, &tcon);
+ if (rc == -EACCES || rc == -EOPNOTSUPP || !server ||
+ !ses)
+ goto error;
}
- goto mount_fail_check;
-#else /* No DFS support, return error on mount */
- rc = -EOPNOTSUPP;
-#endif
}
+ cifs_put_tcon(root_tcon);
if (rc)
- goto mount_fail_check;
+ goto error;
- /* now, hang the tcon off of the superblock */
- tlink = kzalloc(sizeof *tlink, GFP_KERNEL);
- if (tlink == NULL) {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (!tcon->dfs_path) {
+ /* Save full path in new tcon to do failover when reconnecting tcons */
+ tcon->dfs_path = full_path;
+ full_path = NULL;
+ tcon->remap = cifs_remap(cifs_sb);
+ }
+ cifs_sb->origin_fullpath = kstrndup(tcon->dfs_path,
+ strlen(tcon->dfs_path),
+ GFP_ATOMIC);
+ if (!cifs_sb->origin_fullpath) {
+ spin_unlock(&cifs_tcp_ses_lock);
rc = -ENOMEM;
- goto mount_fail_check;
+ goto error;
}
+ spin_unlock(&cifs_tcp_ses_lock);
- tlink->tl_uid = ses->linux_uid;
- tlink->tl_tcon = tcon;
- tlink->tl_time = jiffies;
- set_bit(TCON_LINK_MASTER, &tlink->tl_flags);
- set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
+ rc = dfs_cache_add_vol(vol, cifs_sb->origin_fullpath);
+ if (rc) {
+ kfree(cifs_sb->origin_fullpath);
+ goto error;
+ }
+ /*
+ * After reconnecting to a different server, unique ids won't
+ * match anymore, so we disable serverino. This prevents
+ * dentry revalidation to think the dentry are stale (ESTALE).
+ */
+ cifs_autodisable_serverino(cifs_sb);
+out:
+ free_xid(xid);
+ return mount_setup_tlink(cifs_sb, ses, tcon);
- cifs_sb->master_tlink = tlink;
- spin_lock(&cifs_sb->tlink_tree_lock);
- tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
- spin_unlock(&cifs_sb->tlink_tree_lock);
+error:
+ kfree(full_path);
+ kfree(root_path);
+ mount_put_conns(cifs_sb, xid, server, ses, tcon);
+ return rc;
+}
+#else
+int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
+{
+ int rc = 0;
+ unsigned int xid;
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon;
+ struct TCP_Server_Info *server;
- queue_delayed_work(cifsiod_wq, &cifs_sb->prune_tlinks,
- TLINK_IDLE_EXPIRE);
+ rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon);
+ if (rc)
+ goto error;
-mount_fail_check:
- /* on error free sesinfo and tcon struct if needed */
- if (rc) {
- /* If find_unc succeeded then rc == 0 so we can not end */
- /* up accidentally freeing someone elses tcon struct */
- if (tcon)
- cifs_put_tcon(tcon);
- else if (ses)
- cifs_put_smb_ses(ses);
- else
- cifs_put_tcp_session(server, 0);
+ if (tcon) {
+ rc = is_path_remote(cifs_sb, vol, xid, server, tcon);
+ if (rc == -EREMOTE)
+ rc = -EOPNOTSUPP;
+ if (rc)
+ goto error;
}
-out:
free_xid(xid);
+
+ return mount_setup_tlink(cifs_sb, ses, tcon);
+
+error:
+ mount_put_conns(cifs_sb, xid, server, ses, tcon);
return rc;
}
+#endif
/*
* Issue a TREE_CONNECT request.
@@ -4370,6 +4861,10 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
kfree(cifs_sb->mountdata);
kfree(cifs_sb->prepath);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ dfs_cache_del_vol(cifs_sb->origin_fullpath);
+ kfree(cifs_sb->origin_fullpath);
+#endif
call_rcu(&cifs_sb->rcu, delayed_free);
}
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
new file mode 100644
index 000000000000..09b7d0d4f6e4
--- /dev/null
+++ b/fs/cifs/dfs_cache.c
@@ -0,0 +1,1368 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DFS referral cache routines
+ *
+ * Copyright (c) 2018 Paulo Alcantara <palcantara@suse.de>
+ */
+
+#include <linux/rcupdate.h>
+#include <linux/rculist.h>
+#include <linux/jhash.h>
+#include <linux/ktime.h>
+#include <linux/slab.h>
+#include <linux/nls.h>
+#include <linux/workqueue.h>
+#include "cifsglob.h"
+#include "smb2pdu.h"
+#include "smb2proto.h"
+#include "cifsproto.h"
+#include "cifs_debug.h"
+#include "cifs_unicode.h"
+#include "smb2glob.h"
+
+#include "dfs_cache.h"
+
+#define DFS_CACHE_HTABLE_SIZE 32
+#define DFS_CACHE_MAX_ENTRIES 64
+
+#define IS_INTERLINK_SET(v) ((v) & (DFSREF_REFERRAL_SERVER | \
+ DFSREF_STORAGE_SERVER))
+
+struct dfs_cache_tgt {
+ char *t_name;
+ struct list_head t_list;
+};
+
+struct dfs_cache_entry {
+ struct hlist_node ce_hlist;
+ const char *ce_path;
+ int ce_ttl;
+ int ce_srvtype;
+ int ce_flags;
+ struct timespec64 ce_etime;
+ int ce_path_consumed;
+ int ce_numtgts;
+ struct list_head ce_tlist;
+ struct dfs_cache_tgt *ce_tgthint;
+ struct rcu_head ce_rcu;
+};
+
+static struct kmem_cache *dfs_cache_slab __read_mostly;
+
+struct dfs_cache_vol_info {
+ char *vi_fullpath;
+ struct smb_vol vi_vol;
+ struct list_head vi_list;
+};
+
+struct dfs_cache {
+ struct mutex dc_lock;
+ struct nls_table *dc_nlsc;
+ struct list_head dc_vol_list;
+ int dc_ttl;
+ struct delayed_work dc_refresh;
+};
+
+static struct dfs_cache dfs_cache;
+
+/*
+ * Number of entries in the cache
+ */
+static size_t dfs_cache_count;
+
+static DEFINE_MUTEX(dfs_cache_list_lock);
+static struct hlist_head dfs_cache_htable[DFS_CACHE_HTABLE_SIZE];
+
+static void refresh_cache_worker(struct work_struct *work);
+
+static inline bool is_path_valid(const char *path)
+{
+ return path && (strchr(path + 1, '\\') || strchr(path + 1, '/'));
+}
+
+static inline int get_normalized_path(const char *path, char **npath)
+{
+ if (*path == '\\') {
+ *npath = (char *)path;
+ } else {
+ *npath = kstrndup(path, strlen(path), GFP_KERNEL);
+ if (!*npath)
+ return -ENOMEM;
+ convert_delimiter(*npath, '\\');
+ }
+ return 0;
+}
+
+static inline void free_normalized_path(const char *path, char *npath)
+{
+ if (path != npath)
+ kfree(npath);
+}
+
+static inline bool cache_entry_expired(const struct dfs_cache_entry *ce)
+{
+ struct timespec64 ts;
+
+ ktime_get_coarse_real_ts64(&ts);
+ return timespec64_compare(&ts, &ce->ce_etime) >= 0;
+}
+
+static inline void free_tgts(struct dfs_cache_entry *ce)
+{
+ struct dfs_cache_tgt *t, *n;
+
+ list_for_each_entry_safe(t, n, &ce->ce_tlist, t_list) {
+ list_del(&t->t_list);
+ kfree(t->t_name);
+ kfree(t);
+ }
+}
+
+static void free_cache_entry(struct rcu_head *rcu)
+{
+ struct dfs_cache_entry *ce = container_of(rcu, struct dfs_cache_entry,
+ ce_rcu);
+ kmem_cache_free(dfs_cache_slab, ce);
+}
+
+static inline void flush_cache_ent(struct dfs_cache_entry *ce)
+{
+ if (hlist_unhashed(&ce->ce_hlist))
+ return;
+
+ hlist_del_init_rcu(&ce->ce_hlist);
+ kfree(ce->ce_path);
+ free_tgts(ce);
+ dfs_cache_count--;
+ call_rcu(&ce->ce_rcu, free_cache_entry);
+}
+
+static void flush_cache_ents(void)
+{
+ int i;
+
+ rcu_read_lock();
+ for (i = 0; i < DFS_CACHE_HTABLE_SIZE; i++) {
+ struct hlist_head *l = &dfs_cache_htable[i];
+ struct dfs_cache_entry *ce;
+
+ hlist_for_each_entry_rcu(ce, l, ce_hlist)
+ flush_cache_ent(ce);
+ }
+ rcu_read_unlock();
+}
+
+/*
+ * dfs cache /proc file
+ */
+static int dfscache_proc_show(struct seq_file *m, void *v)
+{
+ int bucket;
+ struct dfs_cache_entry *ce;
+ struct dfs_cache_tgt *t;
+
+ seq_puts(m, "DFS cache\n---------\n");
+
+ mutex_lock(&dfs_cache_list_lock);
+
+ rcu_read_lock();
+ hash_for_each_rcu(dfs_cache_htable, bucket, ce, ce_hlist) {
+ seq_printf(m,
+ "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,"
+ "interlink=%s,path_consumed=%d,expired=%s\n",
+ ce->ce_path,
+ ce->ce_srvtype == DFS_TYPE_ROOT ? "root" : "link",
+ ce->ce_ttl, ce->ce_etime.tv_nsec,
+ IS_INTERLINK_SET(ce->ce_flags) ? "yes" : "no",
+ ce->ce_path_consumed,
+ cache_entry_expired(ce) ? "yes" : "no");
+
+ list_for_each_entry(t, &ce->ce_tlist, t_list) {
+ seq_printf(m, " %s%s\n",
+ t->t_name,
+ ce->ce_tgthint == t ? " (target hint)" : "");
+ }
+
+ }
+ rcu_read_unlock();
+
+ mutex_unlock(&dfs_cache_list_lock);
+ return 0;
+}
+
+static ssize_t dfscache_proc_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ char c;
+ int rc;
+
+ rc = get_user(c, buffer);
+ if (rc)
+ return rc;
+
+ if (c != '0')
+ return -EINVAL;
+
+ cifs_dbg(FYI, "clearing dfs cache");
+ mutex_lock(&dfs_cache_list_lock);
+ flush_cache_ents();
+ mutex_unlock(&dfs_cache_list_lock);
+
+ return count;
+}
+
+static int dfscache_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, dfscache_proc_show, NULL);
+}
+
+const struct file_operations dfscache_proc_fops = {
+ .open = dfscache_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = dfscache_proc_write,
+};
+
+#ifdef CONFIG_CIFS_DEBUG2
+static inline void dump_tgts(const struct dfs_cache_entry *ce)
+{
+ struct dfs_cache_tgt *t;
+
+ cifs_dbg(FYI, "target list:\n");
+ list_for_each_entry(t, &ce->ce_tlist, t_list) {
+ cifs_dbg(FYI, " %s%s\n", t->t_name,
+ ce->ce_tgthint == t ? " (target hint)" : "");
+ }
+}
+
+static inline void dump_ce(const struct dfs_cache_entry *ce)
+{
+ cifs_dbg(FYI, "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,"
+ "interlink=%s,path_consumed=%d,expired=%s\n", ce->ce_path,
+ ce->ce_srvtype == DFS_TYPE_ROOT ? "root" : "link", ce->ce_ttl,
+ ce->ce_etime.tv_nsec,
+ IS_INTERLINK_SET(ce->ce_flags) ? "yes" : "no",
+ ce->ce_path_consumed,
+ cache_entry_expired(ce) ? "yes" : "no");
+ dump_tgts(ce);
+}
+
+static inline void dump_refs(const struct dfs_info3_param *refs, int numrefs)
+{
+ int i;
+
+ cifs_dbg(FYI, "DFS referrals returned by the server:\n");
+ for (i = 0; i < numrefs; i++) {
+ const struct dfs_info3_param *ref = &refs[i];
+
+ cifs_dbg(FYI,
+ "\n"
+ "flags: 0x%x\n"
+ "path_consumed: %d\n"
+ "server_type: 0x%x\n"
+ "ref_flag: 0x%x\n"
+ "path_name: %s\n"
+ "node_name: %s\n"
+ "ttl: %d (%dm)\n",
+ ref->flags, ref->path_consumed, ref->server_type,
+ ref->ref_flag, ref->path_name, ref->node_name,
+ ref->ttl, ref->ttl / 60);
+ }
+}
+#else
+#define dump_tgts(e)
+#define dump_ce(e)
+#define dump_refs(r, n)
+#endif
+
+/**
+ * dfs_cache_init - Initialize DFS referral cache.
+ *
+ * Return zero if initialized successfully, otherwise non-zero.
+ */
+int dfs_cache_init(void)
+{
+ int i;
+
+ dfs_cache_slab = kmem_cache_create("cifs_dfs_cache",
+ sizeof(struct dfs_cache_entry), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!dfs_cache_slab)
+ return -ENOMEM;
+
+ for (i = 0; i < DFS_CACHE_HTABLE_SIZE; i++)
+ INIT_HLIST_HEAD(&dfs_cache_htable[i]);
+
+ INIT_LIST_HEAD(&dfs_cache.dc_vol_list);
+ mutex_init(&dfs_cache.dc_lock);
+ INIT_DELAYED_WORK(&dfs_cache.dc_refresh, refresh_cache_worker);
+ dfs_cache.dc_ttl = -1;
+ dfs_cache.dc_nlsc = load_nls_default();
+
+ cifs_dbg(FYI, "%s: initialized DFS referral cache\n", __func__);
+ return 0;
+}
+
+static inline unsigned int cache_entry_hash(const void *data, int size)
+{
+ unsigned int h;
+
+ h = jhash(data, size, 0);
+ return h & (DFS_CACHE_HTABLE_SIZE - 1);
+}
+
+/* Check whether second path component of @path is SYSVOL or NETLOGON */
+static inline bool is_sysvol_or_netlogon(const char *path)
+{
+ const char *s;
+ char sep = path[0];
+
+ s = strchr(path + 1, sep) + 1;
+ return !strncasecmp(s, "sysvol", strlen("sysvol")) ||
+ !strncasecmp(s, "netlogon", strlen("netlogon"));
+}
+
+/* Return target hint of a DFS cache entry */
+static inline char *get_tgt_name(const struct dfs_cache_entry *ce)
+{
+ struct dfs_cache_tgt *t = ce->ce_tgthint;
+
+ return t ? t->t_name : ERR_PTR(-ENOENT);
+}
+
+/* Return expire time out of a new entry's TTL */
+static inline struct timespec64 get_expire_time(int ttl)
+{
+ struct timespec64 ts = {
+ .tv_sec = ttl,
+ .tv_nsec = 0,
+ };
+ struct timespec64 now;
+
+ ktime_get_coarse_real_ts64(&now);
+ return timespec64_add(now, ts);
+}
+
+/* Allocate a new DFS target */
+static inline struct dfs_cache_tgt *alloc_tgt(const char *name)
+{
+ struct dfs_cache_tgt *t;
+
+ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return ERR_PTR(-ENOMEM);
+ t->t_name = kstrndup(name, strlen(name), GFP_KERNEL);
+ if (!t->t_name) {
+ kfree(t);
+ return ERR_PTR(-ENOMEM);
+ }
+ INIT_LIST_HEAD(&t->t_list);
+ return t;
+}
+
+/*
+ * Copy DFS referral information to a cache entry and conditionally update
+ * target hint.
+ */
+static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs,
+ struct dfs_cache_entry *ce, const char *tgthint)
+{
+ int i;
+
+ ce->ce_ttl = refs[0].ttl;
+ ce->ce_etime = get_expire_time(ce->ce_ttl);
+ ce->ce_srvtype = refs[0].server_type;
+ ce->ce_flags = refs[0].ref_flag;
+ ce->ce_path_consumed = refs[0].path_consumed;
+
+ for (i = 0; i < numrefs; i++) {
+ struct dfs_cache_tgt *t;
+
+ t = alloc_tgt(refs[i].node_name);
+ if (IS_ERR(t)) {
+ free_tgts(ce);
+ return PTR_ERR(t);
+ }
+ if (tgthint && !strcasecmp(t->t_name, tgthint)) {
+ list_add(&t->t_list, &ce->ce_tlist);
+ tgthint = NULL;
+ } else {
+ list_add_tail(&t->t_list, &ce->ce_tlist);
+ }
+ ce->ce_numtgts++;
+ }
+
+ ce->ce_tgthint = list_first_entry_or_null(&ce->ce_tlist,
+ struct dfs_cache_tgt, t_list);
+
+ return 0;
+}
+
+/* Allocate a new cache entry */
+static struct dfs_cache_entry *
+alloc_cache_entry(const char *path, const struct dfs_info3_param *refs,
+ int numrefs)
+{
+ struct dfs_cache_entry *ce;
+ int rc;
+
+ ce = kmem_cache_zalloc(dfs_cache_slab, GFP_KERNEL);
+ if (!ce)
+ return ERR_PTR(-ENOMEM);
+
+ ce->ce_path = kstrdup_const(path, GFP_KERNEL);
+ if (!ce->ce_path) {
+ kmem_cache_free(dfs_cache_slab, ce);
+ return ERR_PTR(-ENOMEM);
+ }
+ INIT_HLIST_NODE(&ce->ce_hlist);
+ INIT_LIST_HEAD(&ce->ce_tlist);
+
+ rc = copy_ref_data(refs, numrefs, ce, NULL);
+ if (rc) {
+ kfree(ce->ce_path);
+ kmem_cache_free(dfs_cache_slab, ce);
+ ce = ERR_PTR(rc);
+ }
+ return ce;
+}
+
+static void remove_oldest_entry(void)
+{
+ int bucket;
+ struct dfs_cache_entry *ce;
+ struct dfs_cache_entry *to_del = NULL;
+
+ rcu_read_lock();
+ hash_for_each_rcu(dfs_cache_htable, bucket, ce, ce_hlist) {
+ if (!to_del || timespec64_compare(&ce->ce_etime,
+ &to_del->ce_etime) < 0)
+ to_del = ce;
+ }
+ if (!to_del) {
+ cifs_dbg(FYI, "%s: no entry to remove", __func__);
+ goto out;
+ }
+ cifs_dbg(FYI, "%s: removing entry", __func__);
+ dump_ce(to_del);
+ flush_cache_ent(to_del);
+out:
+ rcu_read_unlock();
+}
+
+/* Add a new DFS cache entry */
+static inline struct dfs_cache_entry *
+add_cache_entry(unsigned int hash, const char *path,
+ const struct dfs_info3_param *refs, int numrefs)
+{
+ struct dfs_cache_entry *ce;
+
+ ce = alloc_cache_entry(path, refs, numrefs);
+ if (IS_ERR(ce))
+ return ce;
+
+ hlist_add_head_rcu(&ce->ce_hlist, &dfs_cache_htable[hash]);
+
+ mutex_lock(&dfs_cache.dc_lock);
+ if (dfs_cache.dc_ttl < 0) {
+ dfs_cache.dc_ttl = ce->ce_ttl;
+ queue_delayed_work(cifsiod_wq, &dfs_cache.dc_refresh,
+ dfs_cache.dc_ttl * HZ);
+ } else {
+ dfs_cache.dc_ttl = min_t(int, dfs_cache.dc_ttl, ce->ce_ttl);
+ mod_delayed_work(cifsiod_wq, &dfs_cache.dc_refresh,
+ dfs_cache.dc_ttl * HZ);
+ }
+ mutex_unlock(&dfs_cache.dc_lock);
+
+ return ce;
+}
+
+static struct dfs_cache_entry *__find_cache_entry(unsigned int hash,
+ const char *path)
+{
+ struct dfs_cache_entry *ce;
+ bool found = false;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(ce, &dfs_cache_htable[hash], ce_hlist) {
+ if (!strcasecmp(path, ce->ce_path)) {
+#ifdef CONFIG_CIFS_DEBUG2
+ char *name = get_tgt_name(ce);
+
+ if (unlikely(IS_ERR(name))) {
+ rcu_read_unlock();
+ return ERR_CAST(name);
+ }
+ cifs_dbg(FYI, "%s: cache hit\n", __func__);
+ cifs_dbg(FYI, "%s: target hint: %s\n", __func__, name);
+#endif
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return found ? ce : ERR_PTR(-ENOENT);
+}
+
+/*
+ * Find a DFS cache entry in hash table and optionally check prefix path against
+ * @path.
+ * Use whole path components in the match.
+ * Return ERR_PTR(-ENOENT) if the entry is not found.
+ */
+static inline struct dfs_cache_entry *find_cache_entry(const char *path,
+ unsigned int *hash)
+{
+ *hash = cache_entry_hash(path, strlen(path));
+ return __find_cache_entry(*hash, path);
+}
+
+static inline void destroy_slab_cache(void)
+{
+ rcu_barrier();
+ kmem_cache_destroy(dfs_cache_slab);
+}
+
+static inline void free_vol(struct dfs_cache_vol_info *vi)
+{
+ list_del(&vi->vi_list);
+ kfree(vi->vi_fullpath);
+ cifs_cleanup_volume_info_contents(&vi->vi_vol);
+ kfree(vi);
+}
+
+static inline void free_vol_list(void)
+{
+ struct dfs_cache_vol_info *vi, *nvi;
+
+ list_for_each_entry_safe(vi, nvi, &dfs_cache.dc_vol_list, vi_list)
+ free_vol(vi);
+}
+
+/**
+ * dfs_cache_destroy - destroy DFS referral cache
+ */
+void dfs_cache_destroy(void)
+{
+ cancel_delayed_work_sync(&dfs_cache.dc_refresh);
+ unload_nls(dfs_cache.dc_nlsc);
+ free_vol_list();
+ mutex_destroy(&dfs_cache.dc_lock);
+
+ flush_cache_ents();
+ destroy_slab_cache();
+ mutex_destroy(&dfs_cache_list_lock);
+
+ cifs_dbg(FYI, "%s: destroyed DFS referral cache\n", __func__);
+}
+
+static inline struct dfs_cache_entry *
+__update_cache_entry(const char *path, const struct dfs_info3_param *refs,
+ int numrefs)
+{
+ int rc;
+ unsigned int h;
+ struct dfs_cache_entry *ce;
+ char *s, *th = NULL;
+
+ ce = find_cache_entry(path, &h);
+ if (IS_ERR(ce))
+ return ce;
+
+ if (ce->ce_tgthint) {
+ s = ce->ce_tgthint->t_name;
+ th = kstrndup(s, strlen(s), GFP_KERNEL);
+ if (!th)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ free_tgts(ce);
+ ce->ce_numtgts = 0;
+
+ rc = copy_ref_data(refs, numrefs, ce, th);
+ kfree(th);
+
+ if (rc)
+ ce = ERR_PTR(rc);
+
+ return ce;
+}
+
+/* Update an expired cache entry by getting a new DFS referral from server */
+static struct dfs_cache_entry *
+update_cache_entry(const unsigned int xid, struct cifs_ses *ses,
+ const struct nls_table *nls_codepage, int remap,
+ const char *path, struct dfs_cache_entry *ce)
+{
+ int rc;
+ struct dfs_info3_param *refs = NULL;
+ int numrefs = 0;
+
+ cifs_dbg(FYI, "%s: update expired cache entry\n", __func__);
+ /*
+ * Check if caller provided enough parameters to update an expired
+ * entry.
+ */
+ if (!ses || !ses->server || !ses->server->ops->get_dfs_refer)
+ return ERR_PTR(-ETIME);
+ if (unlikely(!nls_codepage))
+ return ERR_PTR(-ETIME);
+
+ cifs_dbg(FYI, "%s: DFS referral request for %s\n", __func__, path);
+
+ rc = ses->server->ops->get_dfs_refer(xid, ses, path, &refs, &numrefs,
+ nls_codepage, remap);
+ if (rc)
+ ce = ERR_PTR(rc);
+ else
+ ce = __update_cache_entry(path, refs, numrefs);
+
+ dump_refs(refs, numrefs);
+ free_dfs_info_array(refs, numrefs);
+
+ return ce;
+}
+
+/*
+ * Find, create or update a DFS cache entry.
+ *
+ * If the entry wasn't found, it will create a new one. Or if it was found but
+ * expired, then it will update the entry accordingly.
+ *
+ * For interlinks, __cifs_dfs_mount() and expand_dfs_referral() are supposed to
+ * handle them properly.
+ */
+static struct dfs_cache_entry *
+do_dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
+ const struct nls_table *nls_codepage, int remap,
+ const char *path, bool noreq)
+{
+ int rc;
+ unsigned int h;
+ struct dfs_cache_entry *ce;
+ struct dfs_info3_param *nrefs;
+ int numnrefs;
+
+ cifs_dbg(FYI, "%s: search path: %s\n", __func__, path);
+
+ ce = find_cache_entry(path, &h);
+ if (IS_ERR(ce)) {
+ cifs_dbg(FYI, "%s: cache miss\n", __func__);
+ /*
+ * If @noreq is set, no requests will be sent to the server for
+ * either updating or getting a new DFS referral.
+ */
+ if (noreq)
+ return ce;
+ /*
+ * No cache entry was found, so check for valid parameters that
+ * will be required to get a new DFS referral and then create a
+ * new cache entry.
+ */
+ if (!ses || !ses->server || !ses->server->ops->get_dfs_refer) {
+ ce = ERR_PTR(-EOPNOTSUPP);
+ return ce;
+ }
+ if (unlikely(!nls_codepage)) {
+ ce = ERR_PTR(-EINVAL);
+ return ce;
+ }
+
+ nrefs = NULL;
+ numnrefs = 0;
+
+ cifs_dbg(FYI, "%s: DFS referral request for %s\n", __func__,
+ path);
+
+ rc = ses->server->ops->get_dfs_refer(xid, ses, path, &nrefs,
+ &numnrefs, nls_codepage,
+ remap);
+ if (rc) {
+ ce = ERR_PTR(rc);
+ return ce;
+ }
+
+ dump_refs(nrefs, numnrefs);
+
+ cifs_dbg(FYI, "%s: new cache entry\n", __func__);
+
+ if (dfs_cache_count >= DFS_CACHE_MAX_ENTRIES) {
+ cifs_dbg(FYI, "%s: reached max cache size (%d)",
+ __func__, DFS_CACHE_MAX_ENTRIES);
+ remove_oldest_entry();
+ }
+ ce = add_cache_entry(h, path, nrefs, numnrefs);
+ free_dfs_info_array(nrefs, numnrefs);
+
+ if (IS_ERR(ce))
+ return ce;
+
+ dfs_cache_count++;
+ }
+
+ dump_ce(ce);
+
+ /* Just return the found cache entry in case @noreq is set */
+ if (noreq)
+ return ce;
+
+ if (cache_entry_expired(ce)) {
+ cifs_dbg(FYI, "%s: expired cache entry\n", __func__);
+ ce = update_cache_entry(xid, ses, nls_codepage, remap, path,
+ ce);
+ if (IS_ERR(ce)) {
+ cifs_dbg(FYI, "%s: failed to update expired entry\n",
+ __func__);
+ }
+ }
+ return ce;
+}
+
+/* Set up a new DFS referral from a given cache entry */
+static int setup_ref(const char *path, const struct dfs_cache_entry *ce,
+ struct dfs_info3_param *ref, const char *tgt)
+{
+ int rc;
+
+ cifs_dbg(FYI, "%s: set up new ref\n", __func__);
+
+ memset(ref, 0, sizeof(*ref));
+
+ ref->path_name = kstrndup(path, strlen(path), GFP_KERNEL);
+ if (!ref->path_name)
+ return -ENOMEM;
+
+ ref->path_consumed = ce->ce_path_consumed;
+
+ ref->node_name = kstrndup(tgt, strlen(tgt), GFP_KERNEL);
+ if (!ref->node_name) {
+ rc = -ENOMEM;
+ goto err_free_path;
+ }
+
+ ref->ttl = ce->ce_ttl;
+ ref->server_type = ce->ce_srvtype;
+ ref->ref_flag = ce->ce_flags;
+
+ return 0;
+
+err_free_path:
+ kfree(ref->path_name);
+ ref->path_name = NULL;
+ return rc;
+}
+
+/* Return target list of a DFS cache entry */
+static int get_tgt_list(const struct dfs_cache_entry *ce,
+ struct dfs_cache_tgt_list *tl)
+{
+ int rc;
+ struct list_head *head = &tl->tl_list;
+ struct dfs_cache_tgt *t;
+ struct dfs_cache_tgt_iterator *it, *nit;
+
+ memset(tl, 0, sizeof(*tl));
+ INIT_LIST_HEAD(head);
+
+ list_for_each_entry(t, &ce->ce_tlist, t_list) {
+ it = kzalloc(sizeof(*it), GFP_KERNEL);
+ if (!it) {
+ rc = -ENOMEM;
+ goto err_free_it;
+ }
+
+ it->it_name = kstrndup(t->t_name, strlen(t->t_name),
+ GFP_KERNEL);
+ if (!it->it_name) {
+ kfree(it);
+ rc = -ENOMEM;
+ goto err_free_it;
+ }
+
+ if (ce->ce_tgthint == t)
+ list_add(&it->it_list, head);
+ else
+ list_add_tail(&it->it_list, head);
+ }
+ tl->tl_numtgts = ce->ce_numtgts;
+
+ return 0;
+
+err_free_it:
+ list_for_each_entry_safe(it, nit, head, it_list) {
+ kfree(it->it_name);
+ kfree(it);
+ }
+ return rc;
+}
+
+/**
+ * dfs_cache_find - find a DFS cache entry
+ *
+ * If it doesn't find the cache entry, then it will get a DFS referral
+ * for @path and create a new entry.
+ *
+ * In case the cache entry exists but expired, it will get a DFS referral
+ * for @path and then update the respective cache entry.
+ *
+ * These parameters are passed down to the get_dfs_refer() call if it
+ * needs to be issued:
+ * @xid: syscall xid
+ * @ses: smb session to issue the request on
+ * @nls_codepage: charset conversion
+ * @remap: path character remapping type
+ * @path: path to lookup in DFS referral cache.
+ *
+ * @ref: when non-NULL, store single DFS referral result in it.
+ * @tgt_list: when non-NULL, store complete DFS target list in it.
+ *
+ * Return zero if the target was found, otherwise non-zero.
+ */
+int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
+ const struct nls_table *nls_codepage, int remap,
+ const char *path, struct dfs_info3_param *ref,
+ struct dfs_cache_tgt_list *tgt_list)
+{
+ int rc;
+ char *npath;
+ struct dfs_cache_entry *ce;
+
+ if (unlikely(!is_path_valid(path)))
+ return -EINVAL;
+
+ rc = get_normalized_path(path, &npath);
+ if (rc)
+ return rc;
+
+ mutex_lock(&dfs_cache_list_lock);
+ ce = do_dfs_cache_find(xid, ses, nls_codepage, remap, npath, false);
+ if (!IS_ERR(ce)) {
+ if (ref)
+ rc = setup_ref(path, ce, ref, get_tgt_name(ce));
+ else
+ rc = 0;
+ if (!rc && tgt_list)
+ rc = get_tgt_list(ce, tgt_list);
+ } else {
+ rc = PTR_ERR(ce);
+ }
+ mutex_unlock(&dfs_cache_list_lock);
+ free_normalized_path(path, npath);
+ return rc;
+}
+
+/**
+ * dfs_cache_noreq_find - find a DFS cache entry without sending any requests to
+ * the currently connected server.
+ *
+ * NOTE: This function will neither update a cache entry in case it was
+ * expired, nor create a new cache entry if @path hasn't been found. It heavily
+ * relies on an existing cache entry.
+ *
+ * @path: path to lookup in the DFS referral cache.
+ * @ref: when non-NULL, store single DFS referral result in it.
+ * @tgt_list: when non-NULL, store complete DFS target list in it.
+ *
+ * Return 0 if successful.
+ * Return -ENOENT if the entry was not found.
+ * Return non-zero for other errors.
+ */
+int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref,
+ struct dfs_cache_tgt_list *tgt_list)
+{
+ int rc;
+ char *npath;
+ struct dfs_cache_entry *ce;
+
+ if (unlikely(!is_path_valid(path)))
+ return -EINVAL;
+
+ rc = get_normalized_path(path, &npath);
+ if (rc)
+ return rc;
+
+ mutex_lock(&dfs_cache_list_lock);
+ ce = do_dfs_cache_find(0, NULL, NULL, 0, npath, true);
+ if (IS_ERR(ce)) {
+ rc = PTR_ERR(ce);
+ goto out;
+ }
+
+ if (ref)
+ rc = setup_ref(path, ce, ref, get_tgt_name(ce));
+ else
+ rc = 0;
+ if (!rc && tgt_list)
+ rc = get_tgt_list(ce, tgt_list);
+out:
+ mutex_unlock(&dfs_cache_list_lock);
+ free_normalized_path(path, npath);
+ return rc;
+}
+
+/**
+ * dfs_cache_update_tgthint - update target hint of a DFS cache entry
+ *
+ * If it doesn't find the cache entry, then it will get a DFS referral for @path
+ * and create a new entry.
+ *
+ * In case the cache entry exists but expired, it will get a DFS referral
+ * for @path and then update the respective cache entry.
+ *
+ * @xid: syscall id
+ * @ses: smb session
+ * @nls_codepage: charset conversion
+ * @remap: type of character remapping for paths
+ * @path: path to lookup in DFS referral cache.
+ * @it: DFS target iterator
+ *
+ * Return zero if the target hint was updated successfully, otherwise non-zero.
+ */
+int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
+ const struct nls_table *nls_codepage, int remap,
+ const char *path,
+ const struct dfs_cache_tgt_iterator *it)
+{
+ int rc;
+ char *npath;
+ struct dfs_cache_entry *ce;
+ struct dfs_cache_tgt *t;
+
+ if (unlikely(!is_path_valid(path)))
+ return -EINVAL;
+
+ rc = get_normalized_path(path, &npath);
+ if (rc)
+ return rc;
+
+ cifs_dbg(FYI, "%s: path: %s\n", __func__, npath);
+
+ mutex_lock(&dfs_cache_list_lock);
+ ce = do_dfs_cache_find(xid, ses, nls_codepage, remap, npath, false);
+ if (IS_ERR(ce)) {
+ rc = PTR_ERR(ce);
+ goto out;
+ }
+
+ rc = 0;
+
+ t = ce->ce_tgthint;
+
+ if (likely(!strcasecmp(it->it_name, t->t_name)))
+ goto out;
+
+ list_for_each_entry(t, &ce->ce_tlist, t_list) {
+ if (!strcasecmp(t->t_name, it->it_name)) {
+ ce->ce_tgthint = t;
+ cifs_dbg(FYI, "%s: new target hint: %s\n", __func__,
+ it->it_name);
+ break;
+ }
+ }
+
+out:
+ mutex_unlock(&dfs_cache_list_lock);
+ free_normalized_path(path, npath);
+ return rc;
+}
+
+/**
+ * dfs_cache_noreq_update_tgthint - update target hint of a DFS cache entry
+ * without sending any requests to the currently connected server.
+ *
+ * NOTE: This function will neither update a cache entry in case it was
+ * expired, nor create a new cache entry if @path hasn't been found. It heavily
+ * relies on an existing cache entry.
+ *
+ * @path: path to lookup in DFS referral cache.
+ * @it: target iterator which contains the target hint to update the cache
+ * entry with.
+ *
+ * Return zero if the target hint was updated successfully, otherwise non-zero.
+ */
+int dfs_cache_noreq_update_tgthint(const char *path,
+ const struct dfs_cache_tgt_iterator *it)
+{
+ int rc;
+ char *npath;
+ struct dfs_cache_entry *ce;
+ struct dfs_cache_tgt *t;
+
+ if (unlikely(!is_path_valid(path)) || !it)
+ return -EINVAL;
+
+ rc = get_normalized_path(path, &npath);
+ if (rc)
+ return rc;
+
+ cifs_dbg(FYI, "%s: path: %s\n", __func__, npath);
+
+ mutex_lock(&dfs_cache_list_lock);
+
+ ce = do_dfs_cache_find(0, NULL, NULL, 0, npath, true);
+ if (IS_ERR(ce)) {
+ rc = PTR_ERR(ce);
+ goto out;
+ }
+
+ rc = 0;
+
+ t = ce->ce_tgthint;
+
+ if (unlikely(!strcasecmp(it->it_name, t->t_name)))
+ goto out;
+
+ list_for_each_entry(t, &ce->ce_tlist, t_list) {
+ if (!strcasecmp(t->t_name, it->it_name)) {
+ ce->ce_tgthint = t;
+ cifs_dbg(FYI, "%s: new target hint: %s\n", __func__,
+ it->it_name);
+ break;
+ }
+ }
+
+out:
+ mutex_unlock(&dfs_cache_list_lock);
+ free_normalized_path(path, npath);
+ return rc;
+}
+
+/**
+ * dfs_cache_get_tgt_referral - returns a DFS referral (@ref) from a given
+ * target iterator (@it).
+ *
+ * @path: path to lookup in DFS referral cache.
+ * @it: DFS target iterator.
+ * @ref: DFS referral pointer to set up the gathered information.
+ *
+ * Return zero if the DFS referral was set up correctly, otherwise non-zero.
+ */
+int dfs_cache_get_tgt_referral(const char *path,
+ const struct dfs_cache_tgt_iterator *it,
+ struct dfs_info3_param *ref)
+{
+ int rc;
+ char *npath;
+ struct dfs_cache_entry *ce;
+ unsigned int h;
+
+ if (!it || !ref)
+ return -EINVAL;
+ if (unlikely(!is_path_valid(path)))
+ return -EINVAL;
+
+ rc = get_normalized_path(path, &npath);
+ if (rc)
+ return rc;
+
+ cifs_dbg(FYI, "%s: path: %s\n", __func__, npath);
+
+ mutex_lock(&dfs_cache_list_lock);
+
+ ce = find_cache_entry(npath, &h);
+ if (IS_ERR(ce)) {
+ rc = PTR_ERR(ce);
+ goto out;
+ }
+
+ cifs_dbg(FYI, "%s: target name: %s\n", __func__, it->it_name);
+
+ rc = setup_ref(path, ce, ref, it->it_name);
+
+out:
+ mutex_unlock(&dfs_cache_list_lock);
+ free_normalized_path(path, npath);
+ return rc;
+}
+
+static int dup_vol(struct smb_vol *vol, struct smb_vol *new)
+{
+ memcpy(new, vol, sizeof(*new));
+
+ if (vol->username) {
+ new->username = kstrndup(vol->username, strlen(vol->username),
+ GFP_KERNEL);
+ if (!new->username)
+ return -ENOMEM;
+ }
+ if (vol->password) {
+ new->password = kstrndup(vol->password, strlen(vol->password),
+ GFP_KERNEL);
+ if (!new->password)
+ goto err_free_username;
+ }
+ if (vol->UNC) {
+ cifs_dbg(FYI, "%s: vol->UNC: %s\n", __func__, vol->UNC);
+ new->UNC = kstrndup(vol->UNC, strlen(vol->UNC), GFP_KERNEL);
+ if (!new->UNC)
+ goto err_free_password;
+ }
+ if (vol->domainname) {
+ new->domainname = kstrndup(vol->domainname,
+ strlen(vol->domainname), GFP_KERNEL);
+ if (!new->domainname)
+ goto err_free_unc;
+ }
+ if (vol->iocharset) {
+ new->iocharset = kstrndup(vol->iocharset,
+ strlen(vol->iocharset), GFP_KERNEL);
+ if (!new->iocharset)
+ goto err_free_domainname;
+ }
+ if (vol->prepath) {
+ cifs_dbg(FYI, "%s: vol->prepath: %s\n", __func__, vol->prepath);
+ new->prepath = kstrndup(vol->prepath, strlen(vol->prepath),
+ GFP_KERNEL);
+ if (!new->prepath)
+ goto err_free_iocharset;
+ }
+
+ return 0;
+
+err_free_iocharset:
+ kfree(new->iocharset);
+err_free_domainname:
+ kfree(new->domainname);
+err_free_unc:
+ kfree(new->UNC);
+err_free_password:
+ kzfree(new->password);
+err_free_username:
+ kfree(new->username);
+ kfree(new);
+ return -ENOMEM;
+}
+
+/**
+ * dfs_cache_add_vol - add a cifs volume during mount() that will be handled by
+ * DFS cache refresh worker.
+ *
+ * @vol: cifs volume.
+ * @fullpath: origin full path.
+ *
+ * Return zero if volume was set up correctly, otherwise non-zero.
+ */
+int dfs_cache_add_vol(struct smb_vol *vol, const char *fullpath)
+{
+ int rc;
+ struct dfs_cache_vol_info *vi;
+
+ if (!vol || !fullpath)
+ return -EINVAL;
+
+ cifs_dbg(FYI, "%s: fullpath: %s\n", __func__, fullpath);
+
+ vi = kzalloc(sizeof(*vi), GFP_KERNEL);
+ if (!vi)
+ return -ENOMEM;
+
+ vi->vi_fullpath = kstrndup(fullpath, strlen(fullpath), GFP_KERNEL);
+ if (!vi->vi_fullpath) {
+ rc = -ENOMEM;
+ goto err_free_vi;
+ }
+
+ rc = dup_vol(vol, &vi->vi_vol);
+ if (rc)
+ goto err_free_fullpath;
+
+ mutex_lock(&dfs_cache.dc_lock);
+ list_add_tail(&vi->vi_list, &dfs_cache.dc_vol_list);
+ mutex_unlock(&dfs_cache.dc_lock);
+ return 0;
+
+err_free_fullpath:
+ kfree(vi->vi_fullpath);
+err_free_vi:
+ kfree(vi);
+ return rc;
+}
+
+static inline struct dfs_cache_vol_info *find_vol(const char *fullpath)
+{
+ struct dfs_cache_vol_info *vi;
+
+ list_for_each_entry(vi, &dfs_cache.dc_vol_list, vi_list) {
+ cifs_dbg(FYI, "%s: vi->vi_fullpath: %s\n", __func__,
+ vi->vi_fullpath);
+ if (!strcasecmp(vi->vi_fullpath, fullpath))
+ return vi;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
+/**
+ * dfs_cache_update_vol - update vol info in DFS cache after failover
+ *
+ * @fullpath: fullpath to look up in volume list.
+ * @server: TCP ses pointer.
+ *
+ * Return zero if volume was updated, otherwise non-zero.
+ */
+int dfs_cache_update_vol(const char *fullpath, struct TCP_Server_Info *server)
+{
+ int rc;
+ struct dfs_cache_vol_info *vi;
+
+ if (!fullpath || !server)
+ return -EINVAL;
+
+ cifs_dbg(FYI, "%s: fullpath: %s\n", __func__, fullpath);
+
+ mutex_lock(&dfs_cache.dc_lock);
+
+ vi = find_vol(fullpath);
+ if (IS_ERR(vi)) {
+ rc = PTR_ERR(vi);
+ goto out;
+ }
+
+ cifs_dbg(FYI, "%s: updating volume info\n", __func__);
+ memcpy(&vi->vi_vol.dstaddr, &server->dstaddr,
+ sizeof(vi->vi_vol.dstaddr));
+ rc = 0;
+
+out:
+ mutex_unlock(&dfs_cache.dc_lock);
+ return rc;
+}
+
+/**
+ * dfs_cache_del_vol - remove volume info in DFS cache during umount()
+ *
+ * @fullpath: fullpath to look up in volume list.
+ */
+void dfs_cache_del_vol(const char *fullpath)
+{
+ struct dfs_cache_vol_info *vi;
+
+ if (!fullpath || !*fullpath)
+ return;
+
+ cifs_dbg(FYI, "%s: fullpath: %s\n", __func__, fullpath);
+
+ mutex_lock(&dfs_cache.dc_lock);
+ vi = find_vol(fullpath);
+ if (!IS_ERR(vi))
+ free_vol(vi);
+ mutex_unlock(&dfs_cache.dc_lock);
+}
+
+/* Get all tcons that are within a DFS namespace and can be refreshed */
+static void get_tcons(struct TCP_Server_Info *server, struct list_head *head)
+{
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon;
+
+ INIT_LIST_HEAD(head);
+
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ if (!tcon->need_reconnect && !tcon->need_reopen_files &&
+ tcon->dfs_path) {
+ tcon->tc_count++;
+ list_add_tail(&tcon->ulist, head);
+ }
+ }
+ if (ses->tcon_ipc && !ses->tcon_ipc->need_reconnect &&
+ ses->tcon_ipc->dfs_path) {
+ list_add_tail(&ses->tcon_ipc->ulist, head);
+ }
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
+}
+
+/* Refresh DFS cache entry from a given tcon */
+static void do_refresh_tcon(struct dfs_cache *dc, struct cifs_tcon *tcon)
+{
+ int rc = 0;
+ unsigned int xid;
+ char *path, *npath;
+ unsigned int h;
+ struct dfs_cache_entry *ce;
+ struct dfs_info3_param *refs = NULL;
+ int numrefs = 0;
+
+ xid = get_xid();
+
+ path = tcon->dfs_path + 1;
+
+ rc = get_normalized_path(path, &npath);
+ if (rc)
+ goto out;
+
+ mutex_lock(&dfs_cache_list_lock);
+ ce = find_cache_entry(npath, &h);
+ mutex_unlock(&dfs_cache_list_lock);
+
+ if (IS_ERR(ce)) {
+ rc = PTR_ERR(ce);
+ goto out;
+ }
+
+ if (!cache_entry_expired(ce))
+ goto out;
+
+ if (unlikely(!tcon->ses->server->ops->get_dfs_refer)) {
+ rc = -EOPNOTSUPP;
+ } else {
+ rc = tcon->ses->server->ops->get_dfs_refer(xid, tcon->ses, path,
+ &refs, &numrefs,
+ dc->dc_nlsc,
+ tcon->remap);
+ if (!rc) {
+ mutex_lock(&dfs_cache_list_lock);
+ ce = __update_cache_entry(npath, refs, numrefs);
+ mutex_unlock(&dfs_cache_list_lock);
+ dump_refs(refs, numrefs);
+ free_dfs_info_array(refs, numrefs);
+ if (IS_ERR(ce))
+ rc = PTR_ERR(ce);
+ }
+ }
+ if (rc)
+ cifs_dbg(FYI, "%s: failed to update expired entry\n", __func__);
+out:
+ free_xid(xid);
+ free_normalized_path(path, npath);
+}
+
+/*
+ * Worker that will refresh DFS cache based on lowest TTL value from a DFS
+ * referral.
+ *
+ * FIXME: ensure that all requests are sent to DFS root for refreshing the
+ * cache.
+ */
+static void refresh_cache_worker(struct work_struct *work)
+{
+ struct dfs_cache *dc = container_of(work, struct dfs_cache,
+ dc_refresh.work);
+ struct dfs_cache_vol_info *vi;
+ struct TCP_Server_Info *server;
+ LIST_HEAD(list);
+ struct cifs_tcon *tcon, *ntcon;
+
+ mutex_lock(&dc->dc_lock);
+
+ list_for_each_entry(vi, &dc->dc_vol_list, vi_list) {
+ server = cifs_find_tcp_session(&vi->vi_vol);
+ if (IS_ERR_OR_NULL(server))
+ continue;
+ if (server->tcpStatus != CifsGood)
+ goto next;
+ get_tcons(server, &list);
+ list_for_each_entry_safe(tcon, ntcon, &list, ulist) {
+ do_refresh_tcon(dc, tcon);
+ list_del_init(&tcon->ulist);
+ cifs_put_tcon(tcon);
+ }
+next:
+ cifs_put_tcp_session(server, 0);
+ }
+ queue_delayed_work(cifsiod_wq, &dc->dc_refresh, dc->dc_ttl * HZ);
+ mutex_unlock(&dc->dc_lock);
+}
diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h
new file mode 100644
index 000000000000..22f366514f3a
--- /dev/null
+++ b/fs/cifs/dfs_cache.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DFS referral cache routines
+ *
+ * Copyright (c) 2018 Paulo Alcantara <palcantara@suse.de>
+ */
+
+#ifndef _CIFS_DFS_CACHE_H
+#define _CIFS_DFS_CACHE_H
+
+#include <linux/nls.h>
+#include <linux/list.h>
+#include "cifsglob.h"
+
+struct dfs_cache_tgt_list {
+ int tl_numtgts;
+ struct list_head tl_list;
+};
+
+struct dfs_cache_tgt_iterator {
+ char *it_name;
+ struct list_head it_list;
+};
+
+extern int dfs_cache_init(void);
+extern void dfs_cache_destroy(void);
+extern const struct file_operations dfscache_proc_fops;
+
+extern int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
+ const struct nls_table *nls_codepage, int remap,
+ const char *path, struct dfs_info3_param *ref,
+ struct dfs_cache_tgt_list *tgt_list);
+extern int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref,
+ struct dfs_cache_tgt_list *tgt_list);
+extern int dfs_cache_update_tgthint(const unsigned int xid,
+ struct cifs_ses *ses,
+ const struct nls_table *nls_codepage,
+ int remap, const char *path,
+ const struct dfs_cache_tgt_iterator *it);
+extern int
+dfs_cache_noreq_update_tgthint(const char *path,
+ const struct dfs_cache_tgt_iterator *it);
+extern int dfs_cache_get_tgt_referral(const char *path,
+ const struct dfs_cache_tgt_iterator *it,
+ struct dfs_info3_param *ref);
+extern int dfs_cache_add_vol(struct smb_vol *vol, const char *fullpath);
+extern int dfs_cache_update_vol(const char *fullpath,
+ struct TCP_Server_Info *server);
+extern void dfs_cache_del_vol(const char *fullpath);
+
+static inline struct dfs_cache_tgt_iterator *
+dfs_cache_get_next_tgt(struct dfs_cache_tgt_list *tl,
+ struct dfs_cache_tgt_iterator *it)
+{
+ if (!tl || list_empty(&tl->tl_list) || !it ||
+ list_is_last(&it->it_list, &tl->tl_list))
+ return NULL;
+ return list_next_entry(it, it_list);
+}
+
+static inline struct dfs_cache_tgt_iterator *
+dfs_cache_get_tgt_iterator(struct dfs_cache_tgt_list *tl)
+{
+ if (!tl)
+ return NULL;
+ return list_first_entry_or_null(&tl->tl_list,
+ struct dfs_cache_tgt_iterator,
+ it_list);
+}
+
+static inline void dfs_cache_free_tgts(struct dfs_cache_tgt_list *tl)
+{
+ struct dfs_cache_tgt_iterator *it, *nit;
+
+ if (!tl || list_empty(&tl->tl_list))
+ return;
+ list_for_each_entry_safe(it, nit, &tl->tl_list, it_list) {
+ list_del(&it->it_list);
+ kfree(it->it_name);
+ kfree(it);
+ }
+ tl->tl_numtgts = 0;
+}
+
+static inline const char *
+dfs_cache_get_tgt_name(const struct dfs_cache_tgt_iterator *it)
+{
+ return it ? it->it_name : NULL;
+}
+
+static inline int
+dfs_cache_get_nr_tgts(const struct dfs_cache_tgt_list *tl)
+{
+ return tl ? tl->tl_numtgts : 0;
+}
+
+#endif /* _CIFS_DFS_CACHE_H */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c9bc56b1baac..2c7689f3998d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -33,6 +33,7 @@
#include <linux/mount.h>
#include <linux/slab.h>
#include <linux/swap.h>
+#include <linux/mm.h>
#include <asm/div64.h>
#include "cifsfs.h"
#include "cifspdu.h"
@@ -732,7 +733,8 @@ reopen_success:
if (can_flush) {
rc = filemap_write_and_wait(inode->i_mapping);
- mapping_set_error(inode->i_mapping, rc);
+ if (!is_interrupt_error(rc))
+ mapping_set_error(inode->i_mapping, rc);
if (tcon->unix_ext)
rc = cifs_get_inode_info_unix(&inode, full_path,
@@ -1103,10 +1105,10 @@ try_again:
rc = posix_lock_file(file, flock, NULL);
up_write(&cinode->lock_sem);
if (rc == FILE_LOCK_DEFERRED) {
- rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
+ rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
if (!rc)
goto try_again;
- posix_unblock_lock(flock);
+ locks_delete_block(flock);
}
return rc;
}
@@ -1131,14 +1133,18 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf) {
+ if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
free_xid(xid);
return -EINVAL;
}
+ BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+ PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+ PAGE_SIZE);
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -1471,12 +1477,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf)
+ if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
return -EINVAL;
+ BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+ PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+ PAGE_SIZE);
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -2109,6 +2119,7 @@ static int cifs_writepages(struct address_space *mapping,
pgoff_t end, index;
struct cifs_writedata *wdata;
int rc = 0;
+ int saved_rc = 0;
unsigned int xid;
/*
@@ -2137,8 +2148,10 @@ retry:
rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
&wsize, &credits);
- if (rc)
+ if (rc != 0) {
+ done = true;
break;
+ }
tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
@@ -2146,6 +2159,7 @@ retry:
&found_pages);
if (!wdata) {
rc = -ENOMEM;
+ done = true;
add_credits_and_wake_if(server, credits, 0);
break;
}
@@ -2174,7 +2188,7 @@ retry:
if (rc != 0) {
add_credits_and_wake_if(server, wdata->credits, 0);
for (i = 0; i < nr_pages; ++i) {
- if (rc == -EAGAIN)
+ if (is_retryable_error(rc))
redirty_page_for_writepage(wbc,
wdata->pages[i]);
else
@@ -2182,7 +2196,7 @@ retry:
end_page_writeback(wdata->pages[i]);
put_page(wdata->pages[i]);
}
- if (rc != -EAGAIN)
+ if (!is_retryable_error(rc))
mapping_set_error(mapping, rc);
}
kref_put(&wdata->refcount, cifs_writedata_release);
@@ -2192,6 +2206,15 @@ retry:
continue;
}
+ /* Return immediately if we received a signal during writing */
+ if (is_interrupt_error(rc)) {
+ done = true;
+ break;
+ }
+
+ if (rc != 0 && saved_rc == 0)
+ saved_rc = rc;
+
wbc->nr_to_write -= nr_pages;
if (wbc->nr_to_write <= 0)
done = true;
@@ -2209,6 +2232,9 @@ retry:
goto retry;
}
+ if (saved_rc != 0)
+ rc = saved_rc;
+
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = index;
@@ -2241,8 +2267,8 @@ cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
set_page_writeback(page);
retry_write:
rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
- if (rc == -EAGAIN) {
- if (wbc->sync_mode == WB_SYNC_ALL)
+ if (is_retryable_error(rc)) {
+ if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
goto retry_write;
redirty_page_for_writepage(wbc, page);
} else if (rc != 0) {
@@ -2617,11 +2643,13 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
if (rc)
break;
+ cur_len = min_t(const size_t, len, wsize);
+
if (ctx->direct_io) {
ssize_t result;
result = iov_iter_get_pages_alloc(
- from, &pagevec, wsize, &start);
+ from, &pagevec, cur_len, &start);
if (result < 0) {
cifs_dbg(VFS,
"direct_writev couldn't get user pages "
@@ -2630,6 +2658,9 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
result, from->type,
from->iov_offset, from->count);
dump_stack();
+
+ rc = result;
+ add_credits_and_wake_if(server, credits, 0);
break;
}
cur_len = (size_t)result;
@@ -3313,13 +3344,16 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
cur_len, &start);
if (result < 0) {
cifs_dbg(VFS,
- "couldn't get user pages (cur_len=%zd)"
+ "couldn't get user pages (rc=%zd)"
" iter type %d"
" iov_offset %zd count %zd\n",
result, direct_iov.type,
direct_iov.iov_offset,
direct_iov.count);
dump_stack();
+
+ rc = result;
+ add_credits_and_wake_if(server, credits, 0);
break;
}
cur_len = (size_t)result;
@@ -3956,7 +3990,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
INIT_LIST_HEAD(tmplist);
- page = list_entry(page_list->prev, struct page, lru);
+ page = lru_to_page(page_list);
/*
* Lock the page and put it in the cache. Since no one else
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a81a9df997c1..478003644916 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -333,7 +333,7 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
fattr->cf_mtime = timespec64_trunc(fattr->cf_mtime, sb->s_time_gran);
fattr->cf_atime = fattr->cf_ctime = fattr->cf_mtime;
fattr->cf_nlink = 2;
- fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL;
+ fattr->cf_flags = CIFS_FATTR_DFS_REFERRAL;
}
static int
@@ -730,7 +730,6 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
FILE_ALL_INFO *data, struct super_block *sb, int xid,
const struct cifs_fid *fid)
{
- bool validinum = false;
__u16 srchflgs;
int rc = 0, tmprc = ENOSYS;
struct cifs_tcon *tcon;
@@ -821,7 +820,6 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
(FILE_DIRECTORY_INFO *)data, cifs_sb);
fattr.cf_uniqueid = le64_to_cpu(
((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId);
- validinum = true;
cifs_buf_release(srchinf->ntwrk_buf_start);
}
@@ -840,31 +838,29 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
*/
if (*inode == NULL) {
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
- if (validinum == false) {
- if (server->ops->get_srv_inum)
- tmprc = server->ops->get_srv_inum(xid,
- tcon, cifs_sb, full_path,
- &fattr.cf_uniqueid, data);
- if (tmprc) {
- cifs_dbg(FYI, "GetSrvInodeNum rc %d\n",
- tmprc);
- fattr.cf_uniqueid = iunique(sb, ROOT_I);
- cifs_autodisable_serverino(cifs_sb);
- } else if ((fattr.cf_uniqueid == 0) &&
- strlen(full_path) == 0) {
- /* some servers ret bad root ino ie 0 */
- cifs_dbg(FYI, "Invalid (0) inodenum\n");
- fattr.cf_flags |=
- CIFS_FATTR_FAKE_ROOT_INO;
- fattr.cf_uniqueid =
- simple_hashstr(tcon->treeName);
- }
+ if (server->ops->get_srv_inum)
+ tmprc = server->ops->get_srv_inum(xid,
+ tcon, cifs_sb, full_path,
+ &fattr.cf_uniqueid, data);
+ if (tmprc) {
+ cifs_dbg(FYI, "GetSrvInodeNum rc %d\n",
+ tmprc);
+ fattr.cf_uniqueid = iunique(sb, ROOT_I);
+ cifs_autodisable_serverino(cifs_sb);
+ } else if ((fattr.cf_uniqueid == 0) &&
+ strlen(full_path) == 0) {
+ /* some servers ret bad root ino ie 0 */
+ cifs_dbg(FYI, "Invalid (0) inodenum\n");
+ fattr.cf_flags |=
+ CIFS_FATTR_FAKE_ROOT_INO;
+ fattr.cf_uniqueid =
+ simple_hashstr(tcon->treeName);
}
} else
fattr.cf_uniqueid = iunique(sb, ROOT_I);
} else {
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
- validinum == false && server->ops->get_srv_inum) {
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
+ && server->ops->get_srv_inum) {
/*
* Pass a NULL tcon to ensure we don't make a round
* trip to the server. This only works for SMB2+.
@@ -2261,6 +2257,11 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
* the flush returns error?
*/
rc = filemap_write_and_wait(inode->i_mapping);
+ if (is_interrupt_error(rc)) {
+ rc = -ERESTARTSYS;
+ goto out;
+ }
+
mapping_set_error(inode->i_mapping, rc);
rc = 0;
@@ -2404,6 +2405,11 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
* the flush returns error?
*/
rc = filemap_write_and_wait(inode->i_mapping);
+ if (is_interrupt_error(rc)) {
+ rc = -ERESTARTSYS;
+ goto cifs_setattr_exit;
+ }
+
mapping_set_error(inode->i_mapping, rc);
rc = 0;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 8a41f4eba726..bee203055b30 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -111,21 +111,27 @@ struct cifs_tcon *
tconInfoAlloc(void)
{
struct cifs_tcon *ret_buf;
- ret_buf = kzalloc(sizeof(struct cifs_tcon), GFP_KERNEL);
- if (ret_buf) {
- atomic_inc(&tconInfoAllocCount);
- ret_buf->tidStatus = CifsNew;
- ++ret_buf->tc_count;
- INIT_LIST_HEAD(&ret_buf->openFileList);
- INIT_LIST_HEAD(&ret_buf->tcon_list);
- spin_lock_init(&ret_buf->open_file_lock);
- mutex_init(&ret_buf->crfid.fid_mutex);
- ret_buf->crfid.fid = kzalloc(sizeof(struct cifs_fid),
- GFP_KERNEL);
- spin_lock_init(&ret_buf->stat_lock);
- atomic_set(&ret_buf->num_local_opens, 0);
- atomic_set(&ret_buf->num_remote_opens, 0);
+
+ ret_buf = kzalloc(sizeof(*ret_buf), GFP_KERNEL);
+ if (!ret_buf)
+ return NULL;
+ ret_buf->crfid.fid = kzalloc(sizeof(*ret_buf->crfid.fid), GFP_KERNEL);
+ if (!ret_buf->crfid.fid) {
+ kfree(ret_buf);
+ return NULL;
}
+
+ atomic_inc(&tconInfoAllocCount);
+ ret_buf->tidStatus = CifsNew;
+ ++ret_buf->tc_count;
+ INIT_LIST_HEAD(&ret_buf->openFileList);
+ INIT_LIST_HEAD(&ret_buf->tcon_list);
+ spin_lock_init(&ret_buf->open_file_lock);
+ mutex_init(&ret_buf->crfid.fid_mutex);
+ spin_lock_init(&ret_buf->stat_lock);
+ atomic_set(&ret_buf->num_local_opens, 0);
+ atomic_set(&ret_buf->num_remote_opens, 0);
+
return ret_buf;
}
@@ -140,6 +146,9 @@ tconInfoFree(struct cifs_tcon *buf_to_free)
kfree(buf_to_free->nativeFileSystem);
kzfree(buf_to_free->password);
kfree(buf_to_free->crfid.fid);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ kfree(buf_to_free->dfs_path);
+#endif
kfree(buf_to_free);
}
@@ -525,9 +534,17 @@ void
cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
{
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
+ struct cifs_tcon *tcon = NULL;
+
+ if (cifs_sb->master_tlink)
+ tcon = cifs_sb_master_tcon(cifs_sb);
+
cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
- cifs_dbg(VFS, "Autodisabling the use of server inode numbers on %s. This server doesn't seem to support them properly. Hardlinks will not be recognized on this mount. Consider mounting with the \"noserverino\" option to silence this message.\n",
- cifs_sb_master_tcon(cifs_sb)->treeName);
+ cifs_dbg(VFS, "Autodisabling the use of server inode numbers on %s.\n",
+ tcon ? tcon->treeName : "new server");
+ cifs_dbg(VFS, "The server doesn't seem to support them properly or the files might be on different servers (DFS).\n");
+ cifs_dbg(VFS, "Hardlinks will not be recognized on this mount. Consider mounting with the \"noserverino\" option to silence this message.\n");
+
}
}
@@ -732,6 +749,8 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
goto parse_DFS_referrals_exit;
}
+ node->ttl = le32_to_cpu(ref->TimeToLive);
+
ref++;
}
@@ -933,3 +952,20 @@ void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page,
else if (page == 0)
*len = rqst->rq_pagesz - rqst->rq_offset;
}
+
+void extract_unc_hostname(const char *unc, const char **h, size_t *len)
+{
+ const char *end;
+
+ /* skip initial slashes */
+ while (*unc && (*unc == '\\' || *unc == '/'))
+ unc++;
+
+ end = unc;
+
+ while (*end && !(*end == '\\' || *end == '/'))
+ end++;
+
+ *h = unc;
+ *len = end - unc;
+}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index e169e1a5fd35..3925a7bfc74d 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -655,7 +655,14 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
/* scan and find it */
int i;
char *cur_ent;
- char *end_of_smb = cfile->srch_inf.ntwrk_buf_start +
+ char *end_of_smb;
+
+ if (cfile->srch_inf.ntwrk_buf_start == NULL) {
+ cifs_dbg(VFS, "ntwrk_buf_start is NULL during readdir\n");
+ return -EIO;
+ }
+
+ end_of_smb = cfile->srch_inf.ntwrk_buf_start +
server->ops->calc_smb_size(
cfile->srch_inf.ntwrk_buf_start,
server);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index aa23c00367ec..dcd49ad60c83 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -534,9 +534,9 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
if (global_secflags & CIFSSEC_MAY_NTLM)
return NTLM;
default:
- /* Fallthrough to attempt LANMAN authentication next */
break;
}
+ /* Fallthrough - to attempt LANMAN authentication next */
case CIFS_NEGFLAVOR_LANMAN:
switch (requested) {
case LANMAN:
@@ -1154,14 +1154,12 @@ out:
static int
_sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data)
{
- struct smb_hdr *smb_buf;
SESSION_SETUP_ANDX *pSMB;
struct cifs_ses *ses = sess_data->ses;
__u32 capabilities;
char *bcc_ptr;
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
- smb_buf = (struct smb_hdr *)pSMB;
capabilities = cifs_ssetup_hdr(ses, pSMB);
if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 378151e09e91..32a6c020478f 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -929,19 +929,18 @@ cifs_unix_dfs_readlink(const unsigned int xid, struct cifs_tcon *tcon,
{
#ifdef CONFIG_CIFS_DFS_UPCALL
int rc;
- unsigned int num_referrals = 0;
- struct dfs_info3_param *referrals = NULL;
+ struct dfs_info3_param referral = {0};
- rc = get_dfs_path(xid, tcon->ses, searchName, nls_codepage,
- &num_referrals, &referrals, 0);
+ rc = get_dfs_path(xid, tcon->ses, searchName, nls_codepage, &referral,
+ 0);
- if (!rc && num_referrals > 0) {
- *symlinkinfo = kstrndup(referrals->node_name,
- strlen(referrals->node_name),
+ if (!rc) {
+ *symlinkinfo = kstrndup(referral.node_name,
+ strlen(referral.node_name),
GFP_KERNEL);
+ free_dfs_info_param(&referral);
if (!*symlinkinfo)
rc = -ENOMEM;
- free_dfs_info_array(referrals, num_referrals);
}
return rc;
#else /* No DFS support */
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 4ed10dd086e6..b204e84b87fb 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -122,12 +122,14 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf)
+ if (max_buf < sizeof(struct smb2_lock_element))
return -EINVAL;
+ BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
max_num = max_buf / sizeof(struct smb2_lock_element);
buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf)
@@ -264,6 +266,8 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
return -EINVAL;
}
+ BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
max_num = max_buf / sizeof(struct smb2_lock_element);
buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf) {
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 9e7ef7ec2d70..f14533da3a93 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -49,7 +49,6 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_open_parms oparms;
struct cifs_fid fid;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = ses->server;
int num_rqst = 0;
struct smb_rqst rqst[3];
int resp_buftype[3];
@@ -97,7 +96,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
goto finished;
- smb2_set_next_command(server, &rqst[num_rqst++]);
+ smb2_set_next_command(tcon, &rqst[num_rqst++]);
/* Operation */
switch (command) {
@@ -111,7 +110,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
SMB2_O_INFO_FILE, 0,
sizeof(struct smb2_file_all_info) +
PATH_MAX * 2, 0, NULL);
- smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_next_command(tcon, &rqst[num_rqst]);
smb2_set_related(&rqst[num_rqst++]);
break;
case SMB2_OP_DELETE:
@@ -127,14 +126,14 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
rqst[num_rqst].rq_iov = si_iov;
rqst[num_rqst].rq_nvec = 1;
- size[0] = 8;
+ size[0] = 1; /* sizeof __u8 See MS-FSCC section 2.4.11 */
data[0] = &delete_pending[0];
rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
COMPOUND_FID, current->tgid,
FILE_DISPOSITION_INFORMATION,
SMB2_O_INFO_FILE, 0, data, size);
- smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_next_command(tcon, &rqst[num_rqst]);
smb2_set_related(&rqst[num_rqst++]);
break;
case SMB2_OP_SET_EOF:
@@ -149,7 +148,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
COMPOUND_FID, current->tgid,
FILE_END_OF_FILE_INFORMATION,
SMB2_O_INFO_FILE, 0, data, size);
- smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_next_command(tcon, &rqst[num_rqst]);
smb2_set_related(&rqst[num_rqst++]);
break;
case SMB2_OP_SET_INFO:
@@ -165,7 +164,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
COMPOUND_FID, current->tgid,
FILE_BASIC_INFORMATION,
SMB2_O_INFO_FILE, 0, data, size);
- smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_next_command(tcon, &rqst[num_rqst]);
smb2_set_related(&rqst[num_rqst++]);
break;
case SMB2_OP_RENAME:
@@ -189,7 +188,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
COMPOUND_FID, current->tgid,
FILE_RENAME_INFORMATION,
SMB2_O_INFO_FILE, 0, data, size);
- smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_next_command(tcon, &rqst[num_rqst]);
smb2_set_related(&rqst[num_rqst++]);
break;
case SMB2_OP_HARDLINK:
@@ -213,7 +212,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
COMPOUND_FID, current->tgid,
FILE_LINK_INFORMATION,
SMB2_O_INFO_FILE, 0, data, size);
- smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_next_command(tcon, &rqst[num_rqst]);
smb2_set_related(&rqst[num_rqst++]);
break;
default:
@@ -388,7 +387,6 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon,
rc = -ENOMEM;
goto smb2_rename_path;
}
-
rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access,
FILE_OPEN, 0, smb2_to_name, command);
smb2_rename_path:
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index d47b7f5dfa6c..924269cec135 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -379,8 +379,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_NONEXISTENT_EA_ENTRY, -EIO, "STATUS_NONEXISTENT_EA_ENTRY"},
{STATUS_NO_EAS_ON_FILE, -ENODATA, "STATUS_NO_EAS_ON_FILE"},
{STATUS_EA_CORRUPT_ERROR, -EIO, "STATUS_EA_CORRUPT_ERROR"},
- {STATUS_FILE_LOCK_CONFLICT, -EIO, "STATUS_FILE_LOCK_CONFLICT"},
- {STATUS_LOCK_NOT_GRANTED, -EIO, "STATUS_LOCK_NOT_GRANTED"},
+ {STATUS_FILE_LOCK_CONFLICT, -EACCES, "STATUS_FILE_LOCK_CONFLICT"},
+ {STATUS_LOCK_NOT_GRANTED, -EACCES, "STATUS_LOCK_NOT_GRANTED"},
{STATUS_DELETE_PENDING, -ENOENT, "STATUS_DELETE_PENDING"},
{STATUS_CTL_FILE_NOT_SUPPORTED, -ENOSYS,
"STATUS_CTL_FILE_NOT_SUPPORTED"},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 225fec1cfa67..cf7eb891804f 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -831,72 +831,48 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
{
int rc;
__le16 *utf16_path;
- __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
- struct cifs_open_parms oparms;
- struct cifs_fid fid;
- struct smb2_file_full_ea_info *smb2_data;
- int ea_buf_size = SMB2_MIN_EA_BUF;
+ struct kvec rsp_iov = {NULL, 0};
+ int buftype = CIFS_NO_BUFFER;
+ struct smb2_query_info_rsp *rsp;
+ struct smb2_file_full_ea_info *info = NULL;
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
if (!utf16_path)
return -ENOMEM;
- oparms.tcon = tcon;
- oparms.desired_access = FILE_READ_EA;
- oparms.disposition = FILE_OPEN;
- if (backup_cred(cifs_sb))
- oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
- else
- oparms.create_options = 0;
- oparms.fid = &fid;
- oparms.reconnect = false;
-
- rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL);
- kfree(utf16_path);
+ rc = smb2_query_info_compound(xid, tcon, utf16_path,
+ FILE_READ_EA,
+ FILE_FULL_EA_INFORMATION,
+ SMB2_O_INFO_FILE,
+ SMB2_MAX_EA_BUF,
+ &rsp_iov, &buftype, cifs_sb);
if (rc) {
- cifs_dbg(FYI, "open failed rc=%d\n", rc);
- return rc;
- }
-
- while (1) {
- smb2_data = kzalloc(ea_buf_size, GFP_KERNEL);
- if (smb2_data == NULL) {
- SMB2_close(xid, tcon, fid.persistent_fid,
- fid.volatile_fid);
- return -ENOMEM;
- }
-
- rc = SMB2_query_eas(xid, tcon, fid.persistent_fid,
- fid.volatile_fid,
- ea_buf_size, smb2_data);
-
- if (rc != -E2BIG)
- break;
-
- kfree(smb2_data);
- ea_buf_size <<= 1;
-
- if (ea_buf_size > SMB2_MAX_EA_BUF) {
- cifs_dbg(VFS, "EA size is too large\n");
- SMB2_close(xid, tcon, fid.persistent_fid,
- fid.volatile_fid);
- return -ENOMEM;
- }
+ /*
+ * If ea_name is NULL (listxattr) and there are no EAs,
+ * return 0 as it's not an error. Otherwise, the specified
+ * ea_name was not found.
+ */
+ if (!ea_name && rc == -ENODATA)
+ rc = 0;
+ goto qeas_exit;
}
- SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
+ rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
+ le32_to_cpu(rsp->OutputBufferLength),
+ &rsp_iov,
+ sizeof(struct smb2_file_full_ea_info));
+ if (rc)
+ goto qeas_exit;
- /*
- * If ea_name is NULL (listxattr) and there are no EAs, return 0 as it's
- * not an error. Otherwise, the specified ea_name was not found.
- */
- if (!rc)
- rc = move_smb2_ea_to_cifs(ea_data, buf_size, smb2_data,
- SMB2_MAX_EA_BUF, ea_name);
- else if (!ea_name && rc == -ENODATA)
- rc = 0;
+ info = (struct smb2_file_full_ea_info *)(
+ le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
+ rc = move_smb2_ea_to_cifs(ea_data, buf_size, info,
+ le32_to_cpu(rsp->OutputBufferLength), ea_name);
- kfree(smb2_data);
+ qeas_exit:
+ kfree(utf16_path);
+ free_rsp_buf(buftype, rsp_iov.iov_base);
return rc;
}
@@ -907,14 +883,27 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
const __u16 ea_value_len, const struct nls_table *nls_codepage,
struct cifs_sb_info *cifs_sb)
{
- int rc;
- __le16 *utf16_path;
- __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
- struct cifs_open_parms oparms;
- struct cifs_fid fid;
- struct smb2_file_full_ea_info *ea;
+ struct cifs_ses *ses = tcon->ses;
+ __le16 *utf16_path = NULL;
int ea_name_len = strlen(ea_name);
+ int flags = 0;
int len;
+ struct smb_rqst rqst[3];
+ int resp_buftype[3];
+ struct kvec rsp_iov[3];
+ struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
+ struct cifs_open_parms oparms;
+ __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+ struct cifs_fid fid;
+ struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE];
+ unsigned int size[1];
+ void *data[1];
+ struct smb2_file_full_ea_info *ea = NULL;
+ struct kvec close_iov[1];
+ int rc;
+
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
if (ea_name_len > 255)
return -EINVAL;
@@ -923,6 +912,16 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
if (!utf16_path)
return -ENOMEM;
+ memset(rqst, 0, sizeof(rqst));
+ resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
+ memset(rsp_iov, 0, sizeof(rsp_iov));
+
+ /* Open */
+ memset(&open_iov, 0, sizeof(open_iov));
+ rqst[0].rq_iov = open_iov;
+ rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE;
+
+ memset(&oparms, 0, sizeof(oparms));
oparms.tcon = tcon;
oparms.desired_access = FILE_WRITE_EA;
oparms.disposition = FILE_OPEN;
@@ -933,18 +932,22 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
oparms.fid = &fid;
oparms.reconnect = false;
- rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL);
- kfree(utf16_path);
- if (rc) {
- cifs_dbg(FYI, "open failed rc=%d\n", rc);
- return rc;
- }
+ rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, utf16_path);
+ if (rc)
+ goto sea_exit;
+ smb2_set_next_command(tcon, &rqst[0]);
+
+
+ /* Set Info */
+ memset(&si_iov, 0, sizeof(si_iov));
+ rqst[1].rq_iov = si_iov;
+ rqst[1].rq_nvec = 1;
len = sizeof(ea) + ea_name_len + ea_value_len + 1;
ea = kzalloc(len, GFP_KERNEL);
if (ea == NULL) {
- SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto sea_exit;
}
ea->ea_name_length = ea_name_len;
@@ -952,12 +955,36 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
memcpy(ea->ea_data, ea_name, ea_name_len + 1);
memcpy(ea->ea_data + ea_name_len + 1, ea_value, ea_value_len);
- rc = SMB2_set_ea(xid, tcon, fid.persistent_fid, fid.volatile_fid, ea,
- len);
- kfree(ea);
+ size[0] = len;
+ data[0] = ea;
+
+ rc = SMB2_set_info_init(tcon, &rqst[1], COMPOUND_FID,
+ COMPOUND_FID, current->tgid,
+ FILE_FULL_EA_INFORMATION,
+ SMB2_O_INFO_FILE, 0, data, size);
+ smb2_set_next_command(tcon, &rqst[1]);
+ smb2_set_related(&rqst[1]);
- SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ /* Close */
+ memset(&close_iov, 0, sizeof(close_iov));
+ rqst[2].rq_iov = close_iov;
+ rqst[2].rq_nvec = 1;
+ rc = SMB2_close_init(tcon, &rqst[2], COMPOUND_FID, COMPOUND_FID);
+ smb2_set_related(&rqst[2]);
+
+ rc = compound_send_recv(xid, ses, flags, 3, rqst,
+ resp_buftype, rsp_iov);
+
+ sea_exit:
+ kfree(ea);
+ kfree(utf16_path);
+ SMB2_open_free(&rqst[0]);
+ SMB2_set_info_free(&rqst[1]);
+ SMB2_close_free(&rqst[2]);
+ free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+ free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+ free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
return rc;
}
#endif
@@ -1194,7 +1221,7 @@ smb2_ioctl_query_info(const unsigned int xid,
rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, path);
if (rc)
goto iqinf_exit;
- smb2_set_next_command(ses->server, &rqst[0]);
+ smb2_set_next_command(tcon, &rqst[0]);
/* Query */
memset(&qi_iov, 0, sizeof(qi_iov));
@@ -1208,7 +1235,7 @@ smb2_ioctl_query_info(const unsigned int xid,
qi.output_buffer_length, buffer);
if (rc)
goto iqinf_exit;
- smb2_set_next_command(ses->server, &rqst[1]);
+ smb2_set_next_command(tcon, &rqst[1]);
smb2_set_related(&rqst[1]);
/* Close */
@@ -1761,42 +1788,79 @@ smb2_set_related(struct smb_rqst *rqst)
char smb2_padding[7] = {0, 0, 0, 0, 0, 0, 0};
void
-smb2_set_next_command(struct TCP_Server_Info *server, struct smb_rqst *rqst)
+smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst)
{
struct smb2_sync_hdr *shdr;
+ struct cifs_ses *ses = tcon->ses;
+ struct TCP_Server_Info *server = ses->server;
unsigned long len = smb_rqst_len(server, rqst);
+ int i, num_padding;
/* SMB headers in a compound are 8 byte aligned. */
- if (len & 7) {
+
+ /* No padding needed */
+ if (!(len & 7))
+ goto finished;
+
+ num_padding = 8 - (len & 7);
+ if (!smb3_encryption_required(tcon)) {
+ /*
+ * If we do not have encryption then we can just add an extra
+ * iov for the padding.
+ */
rqst->rq_iov[rqst->rq_nvec].iov_base = smb2_padding;
- rqst->rq_iov[rqst->rq_nvec].iov_len = 8 - (len & 7);
+ rqst->rq_iov[rqst->rq_nvec].iov_len = num_padding;
rqst->rq_nvec++;
- len = smb_rqst_len(server, rqst);
+ len += num_padding;
+ } else {
+ /*
+ * We can not add a small padding iov for the encryption case
+ * because the encryption framework can not handle the padding
+ * iovs.
+ * We have to flatten this into a single buffer and add
+ * the padding to it.
+ */
+ for (i = 1; i < rqst->rq_nvec; i++) {
+ memcpy(rqst->rq_iov[0].iov_base +
+ rqst->rq_iov[0].iov_len,
+ rqst->rq_iov[i].iov_base,
+ rqst->rq_iov[i].iov_len);
+ rqst->rq_iov[0].iov_len += rqst->rq_iov[i].iov_len;
+ }
+ memset(rqst->rq_iov[0].iov_base + rqst->rq_iov[0].iov_len,
+ 0, num_padding);
+ rqst->rq_iov[0].iov_len += num_padding;
+ len += num_padding;
+ rqst->rq_nvec = 1;
}
+ finished:
shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base);
shdr->NextCommand = cpu_to_le32(len);
}
-static int
-smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
- struct kstatfs *buf)
+/*
+ * Passes the query info response back to the caller on success.
+ * Caller need to free this with free_rsp_buf().
+ */
+int
+smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon,
+ __le16 *utf16_path, u32 desired_access,
+ u32 class, u32 type, u32 output_len,
+ struct kvec *rsp, int *buftype,
+ struct cifs_sb_info *cifs_sb)
{
- struct smb2_query_info_rsp *rsp;
- struct smb2_fs_full_size_info *info = NULL;
+ struct cifs_ses *ses = tcon->ses;
+ int flags = 0;
struct smb_rqst rqst[3];
int resp_buftype[3];
struct kvec rsp_iov[3];
struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
struct kvec qi_iov[1];
struct kvec close_iov[1];
- struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = ses->server;
- __le16 srch_path = 0; /* Null - open root of share */
u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
struct cifs_open_parms oparms;
struct cifs_fid fid;
- int flags = 0;
int rc;
if (smb3_encryption_required(tcon))
@@ -1811,29 +1875,31 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE;
oparms.tcon = tcon;
- oparms.desired_access = FILE_READ_ATTRIBUTES;
+ oparms.desired_access = desired_access;
oparms.disposition = FILE_OPEN;
- oparms.create_options = 0;
+ if (cifs_sb && backup_cred(cifs_sb))
+ oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
+ else
+ oparms.create_options = 0;
oparms.fid = &fid;
oparms.reconnect = false;
- rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, &srch_path);
+ rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, utf16_path);
if (rc)
- goto qfs_exit;
- smb2_set_next_command(server, &rqst[0]);
+ goto qic_exit;
+ smb2_set_next_command(tcon, &rqst[0]);
memset(&qi_iov, 0, sizeof(qi_iov));
rqst[1].rq_iov = qi_iov;
rqst[1].rq_nvec = 1;
rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID,
- FS_FULL_SIZE_INFORMATION,
- SMB2_O_INFO_FILESYSTEM, 0,
- sizeof(struct smb2_fs_full_size_info), 0,
+ class, type, 0,
+ output_len, 0,
NULL);
if (rc)
- goto qfs_exit;
- smb2_set_next_command(server, &rqst[1]);
+ goto qic_exit;
+ smb2_set_next_command(tcon, &rqst[1]);
smb2_set_related(&rqst[1]);
memset(&close_iov, 0, sizeof(close_iov));
@@ -1842,32 +1908,61 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_close_init(tcon, &rqst[2], COMPOUND_FID, COMPOUND_FID);
if (rc)
- goto qfs_exit;
+ goto qic_exit;
smb2_set_related(&rqst[2]);
rc = compound_send_recv(xid, ses, flags, 3, rqst,
resp_buftype, rsp_iov);
+ if (rc) {
+ free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+ goto qic_exit;
+ }
+ *rsp = rsp_iov[1];
+ *buftype = resp_buftype[1];
+
+ qic_exit:
+ SMB2_open_free(&rqst[0]);
+ SMB2_query_info_free(&rqst[1]);
+ SMB2_close_free(&rqst[2]);
+ free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+ free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
+ return rc;
+}
+
+static int
+smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
+ struct kstatfs *buf)
+{
+ struct smb2_query_info_rsp *rsp;
+ struct smb2_fs_full_size_info *info = NULL;
+ __le16 utf16_path = 0; /* Null - open root of share */
+ struct kvec rsp_iov = {NULL, 0};
+ int buftype = CIFS_NO_BUFFER;
+ int rc;
+
+
+ rc = smb2_query_info_compound(xid, tcon, &utf16_path,
+ FILE_READ_ATTRIBUTES,
+ FS_FULL_SIZE_INFORMATION,
+ SMB2_O_INFO_FILESYSTEM,
+ sizeof(struct smb2_fs_full_size_info),
+ &rsp_iov, &buftype, NULL);
if (rc)
goto qfs_exit;
- rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
+ rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
buf->f_type = SMB2_MAGIC_NUMBER;
info = (struct smb2_fs_full_size_info *)(
le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
le32_to_cpu(rsp->OutputBufferLength),
- &rsp_iov[1],
+ &rsp_iov,
sizeof(struct smb2_fs_full_size_info));
if (!rc)
smb2_copy_fs_info_to_kstatfs(info, buf);
qfs_exit:
- SMB2_open_free(&rqst[0]);
- SMB2_query_info_free(&rqst[1]);
- SMB2_close_free(&rqst[2]);
- free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
- free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
- free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
+ free_rsp_buf(buftype, rsp_iov.iov_base);
return rc;
}
@@ -2736,7 +2831,7 @@ init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign)
smb2_sg_set_buf(&sg[idx++],
rqst[i].rq_iov[j].iov_base + skip,
rqst[i].rq_iov[j].iov_len - skip);
- }
+ }
for (j = 0; j < rqst[i].rq_npages; j++) {
unsigned int len, offset;
@@ -3377,8 +3472,10 @@ smb3_receive_transform(struct TCP_Server_Info *server,
}
/* TODO: add support for compounds containing READ. */
- if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
+ if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server)) {
+ *num_mids = 1;
return receive_encrypted_read(server, &mids[0]);
+ }
return receive_encrypted_standard(server, mids, bufs, num_mids);
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 27f86537a5d1..50811a7dc0e0 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -50,6 +50,9 @@
#include "cifs_spnego.h"
#include "smbdirect.h"
#include "trace.h"
+#ifdef CONFIG_CIFS_DFS_UPCALL
+#include "dfs_cache.h"
+#endif
/*
* The following table defines the expected "StructureSize" of SMB2 requests
@@ -152,6 +155,86 @@ out:
return;
}
+#ifdef CONFIG_CIFS_DFS_UPCALL
+static int __smb2_reconnect(const struct nls_table *nlsc,
+ struct cifs_tcon *tcon)
+{
+ int rc;
+ struct dfs_cache_tgt_list tl;
+ struct dfs_cache_tgt_iterator *it = NULL;
+ char *tree;
+ const char *tcp_host;
+ size_t tcp_host_len;
+ const char *dfs_host;
+ size_t dfs_host_len;
+
+ tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
+ if (!tree)
+ return -ENOMEM;
+
+ if (tcon->ipc) {
+ snprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$",
+ tcon->ses->server->hostname);
+ rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
+ goto out;
+ }
+
+ if (!tcon->dfs_path) {
+ rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+ goto out;
+ }
+
+ rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl);
+ if (rc)
+ goto out;
+
+ extract_unc_hostname(tcon->ses->server->hostname, &tcp_host,
+ &tcp_host_len);
+
+ for (it = dfs_cache_get_tgt_iterator(&tl); it;
+ it = dfs_cache_get_next_tgt(&tl, it)) {
+ const char *tgt = dfs_cache_get_tgt_name(it);
+
+ extract_unc_hostname(tgt, &dfs_host, &dfs_host_len);
+
+ if (dfs_host_len != tcp_host_len
+ || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) {
+ cifs_dbg(FYI, "%s: skipping %.*s, doesn't match %.*s",
+ __func__,
+ (int)dfs_host_len, dfs_host,
+ (int)tcp_host_len, tcp_host);
+ continue;
+ }
+
+ snprintf(tree, MAX_TREE_SIZE, "\\%s", tgt);
+
+ rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
+ if (!rc)
+ break;
+ if (rc == -EREMOTE)
+ break;
+ }
+
+ if (!rc) {
+ if (it)
+ rc = dfs_cache_noreq_update_tgthint(tcon->dfs_path + 1,
+ it);
+ else
+ rc = -ENOENT;
+ }
+ dfs_cache_free_tgts(&tl);
+out:
+ kfree(tree);
+ return rc;
+}
+#else
+static inline int __smb2_reconnect(const struct nls_table *nlsc,
+ struct cifs_tcon *tcon)
+{
+ return SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+}
+#endif
+
static int
smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
{
@@ -159,6 +242,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
struct nls_table *nls_codepage;
struct cifs_ses *ses;
struct TCP_Server_Info *server;
+ int retries;
/*
* SMB2s NegProt, SessSetup, Logoff do not have tcon yet so
@@ -192,9 +276,12 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
ses = tcon->ses;
server = ses->server;
+ retries = server->nr_targets;
+
/*
- * Give demultiplex thread up to 10 seconds to reconnect, should be
- * greater than cifs socket timeout which is 7 seconds
+ * Give demultiplex thread up to 10 seconds to each target available for
+ * reconnect -- should be greater than cifs socket timeout which is 7
+ * seconds.
*/
while (server->tcpStatus == CifsNeedReconnect) {
/*
@@ -225,6 +312,9 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
if (server->tcpStatus != CifsNeedReconnect)
break;
+ if (--retries)
+ continue;
+
/*
* on "soft" mounts we wait once. Hard mounts keep
* retrying until process is killed or server comes
@@ -234,6 +324,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n");
return -EHOSTDOWN;
}
+ retries = server->nr_targets;
}
if (!tcon->ses->need_reconnect && !tcon->need_reconnect)
@@ -271,7 +362,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
if (tcon->use_persistent)
tcon->need_reopen_files = true;
- rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nls_codepage);
+ rc = __smb2_reconnect(nls_codepage, tcon);
mutex_unlock(&tcon->ses->session_mutex);
cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc);
@@ -369,10 +460,6 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
}
-/* offset is sizeof smb2_negotiate_req but rounded up to 8 bytes */
-#define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) */
-
-
#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1)
#define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2)
#define SMB2_POSIX_EXTENSIONS_AVAILABLE cpu_to_le16(0x100)
@@ -409,10 +496,24 @@ static void
assemble_neg_contexts(struct smb2_negotiate_req *req,
unsigned int *total_len)
{
- char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT;
+ char *pneg_ctxt = (char *)req;
unsigned int ctxt_len;
- *total_len += 2; /* Add 2 due to round to 8 byte boundary for 1st ctxt */
+ if (*total_len > 200) {
+ /* In case length corrupted don't want to overrun smb buffer */
+ cifs_dbg(VFS, "Bad frame length assembling neg contexts\n");
+ return;
+ }
+
+ /*
+ * round up total_len of fixed part of SMB3 negotiate request to 8
+ * byte boundary before adding negotiate contexts
+ */
+ *total_len = roundup(*total_len, 8);
+
+ pneg_ctxt = (*total_len) + (char *)req;
+ req->NegotiateContextOffset = cpu_to_le32(*total_len);
+
build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt);
ctxt_len = DIV_ROUND_UP(sizeof(struct smb2_preauth_neg_context), 8) * 8;
*total_len += ctxt_len;
@@ -426,7 +527,6 @@ assemble_neg_contexts(struct smb2_negotiate_req *req,
build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt);
*total_len += sizeof(struct smb2_posix_neg_context);
- req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
req->NegotiateContextCount = cpu_to_le16(3);
}
@@ -642,8 +742,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
- req->DialectCount = cpu_to_le16(3);
- total_len += 6;
+ req->Dialects[3] = cpu_to_le16(SMB311_PROT_ID);
+ req->DialectCount = cpu_to_le16(4);
+ total_len += 8;
} else {
/* otherwise send specific dialect */
req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
@@ -667,7 +768,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
else {
memcpy(req->ClientGUID, server->client_guid,
SMB2_CLIENT_GUID_SIZE);
- if (ses->server->vals->protocol_id == SMB311_PROT_ID)
+ if ((ses->server->vals->protocol_id == SMB311_PROT_ID) ||
+ (strcmp(ses->server->vals->version_string,
+ SMBDEFAULT_VERSION_STRING) == 0))
assemble_neg_contexts(req, &total_len);
}
iov[0].iov_base = (char *)req;
@@ -712,7 +815,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
/* ops set to 3.0 by default for default so update */
ses->server->ops = &smb21_operations;
- }
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID))
+ ses->server->ops = &smb311_operations;
} else if (le16_to_cpu(rsp->DialectRevision) !=
ses->server->vals->protocol_id) {
/* if requested single dialect ensure returned dialect matched */
@@ -859,13 +963,14 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
pneg_inbuf->DialectCount = cpu_to_le16(2);
/* structure is big enough for 3 dialects, sending only 2 */
inbuflen = sizeof(*pneg_inbuf) -
- sizeof(pneg_inbuf->Dialects[0]);
+ (2 * sizeof(pneg_inbuf->Dialects[0]));
} else if (strcmp(tcon->ses->server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) {
pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
pneg_inbuf->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
- pneg_inbuf->DialectCount = cpu_to_le16(3);
+ pneg_inbuf->Dialects[3] = cpu_to_le16(SMB311_PROT_ID);
+ pneg_inbuf->DialectCount = cpu_to_le16(4);
/* structure is big enough for 3 dialects */
inbuflen = sizeof(*pneg_inbuf);
} else {
@@ -1955,7 +2060,6 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
struct smb_rqst rqst;
struct smb2_create_req *req;
struct smb2_create_rsp *rsp = NULL;
- struct TCP_Server_Info *server;
struct cifs_ses *ses = tcon->ses;
struct kvec iov[3]; /* make sure at least one for each open context */
struct kvec rsp_iov = {NULL, 0};
@@ -1978,9 +2082,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
if (!utf16_path)
return -ENOMEM;
- if (ses && (ses->server))
- server = ses->server;
- else {
+ if (!ses || !(ses->server)) {
rc = -EIO;
goto err_free_path;
}
@@ -2768,18 +2870,6 @@ qinf_exit:
return rc;
}
-int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid,
- int ea_buf_size, struct smb2_file_full_ea_info *data)
-{
- return query_info(xid, tcon, persistent_fid, volatile_fid,
- FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, 0,
- ea_buf_size,
- sizeof(struct smb2_file_full_ea_info),
- (void **)&data,
- NULL);
-}
-
int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, struct smb2_file_all_info *data)
{
@@ -3197,12 +3287,14 @@ smb2_async_readv(struct cifs_readdata *rdata)
if (rdata->credits) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
SMB2_MAX_BUFFER_SIZE));
- shdr->CreditRequest = shdr->CreditCharge;
+ shdr->CreditRequest =
+ cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1);
spin_lock(&server->req_lock);
server->credits += rdata->credits -
le16_to_cpu(shdr->CreditCharge);
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
+ rdata->credits = le16_to_cpu(shdr->CreditCharge);
flags |= CIFS_HAS_CREDITS;
}
@@ -3474,12 +3566,14 @@ smb2_async_writev(struct cifs_writedata *wdata,
if (wdata->credits) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
SMB2_MAX_BUFFER_SIZE));
- shdr->CreditRequest = shdr->CreditCharge;
+ shdr->CreditRequest =
+ cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1);
spin_lock(&server->req_lock);
server->credits += wdata->credits -
le16_to_cpu(shdr->CreditCharge);
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
+ wdata->credits = le16_to_cpu(shdr->CreditCharge);
flags |= CIFS_HAS_CREDITS;
}
@@ -3994,7 +4088,6 @@ static int
build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
int outbuf_len, u64 persistent_fid, u64 volatile_fid)
{
- struct TCP_Server_Info *server;
int rc;
struct smb2_query_info_req *req;
unsigned int total_len;
@@ -4004,8 +4097,6 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
if ((tcon->ses == NULL) || (tcon->ses->server == NULL))
return -EIO;
- server = tcon->ses->server;
-
rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, (void **) &req,
&total_len);
if (rc)
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 5671d5ee7f58..7a2d0a2255e6 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -898,7 +898,7 @@ struct validate_negotiate_info_req {
__u8 Guid[SMB2_CLIENT_GUID_SIZE];
__le16 SecurityMode;
__le16 DialectCount;
- __le16 Dialects[3]; /* BB expand this if autonegotiate > 3 dialects */
+ __le16 Dialects[4]; /* BB expand this if autonegotiate > 4 dialects */
} __packed;
struct validate_negotiate_info_rsp {
@@ -1398,7 +1398,6 @@ struct smb2_file_link_info { /* encoding of request for level 11 */
char FileName[0]; /* Name to be assigned to new link */
} __packed; /* level 11 Set */
-#define SMB2_MIN_EA_BUF 2048
#define SMB2_MAX_EA_BUF 65536
struct smb2_file_full_ea_info { /* encoding of response for level 15 */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 9f4e9ed9ce53..87733b27a65f 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -116,7 +116,7 @@ extern void smb2_reconnect_server(struct work_struct *work);
extern int smb3_crypto_aead_allocate(struct TCP_Server_Info *server);
extern unsigned long smb_rqst_len(struct TCP_Server_Info *server,
struct smb_rqst *rqst);
-extern void smb2_set_next_command(struct TCP_Server_Info *server,
+extern void smb2_set_next_command(struct cifs_tcon *tcon,
struct smb_rqst *rqst);
extern void smb2_set_related(struct smb_rqst *rqst);
@@ -153,10 +153,6 @@ extern int SMB2_close_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
extern void SMB2_close_free(struct smb_rqst *rqst);
extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
-extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_file_id, u64 volatile_file_id,
- int ea_buf_size,
- struct smb2_file_full_ea_info *data);
extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
struct smb2_file_all_info *data);
@@ -240,4 +236,10 @@ extern void smb2_copy_fs_info_to_kstatfs(
extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server);
extern int smb311_update_preauth_hash(struct cifs_ses *ses,
struct kvec *iov, int nvec);
+extern int smb2_query_info_compound(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ __le16 *utf16_path, u32 desired_access,
+ u32 class, u32 type, u32 output_len,
+ struct kvec *rsp, int *buftype,
+ struct cifs_sb_info *cifs_sb);
#endif /* _SMB2PROTO_H */
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index e94a8d1d08a3..a568dac7b3a1 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -1724,7 +1724,7 @@ static struct smbd_connection *_smbd_get_connection(
info->responder_resources);
/* Need to send IRD/ORD in private data for iWARP */
- info->id->device->get_port_immutable(
+ info->id->device->ops.get_port_immutable(
info->id->device, info->id->port_num, &port_immutable);
if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
ird_ord_hdr[0] = info->responder_resources;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 83ff0c25710d..202e0e84efdd 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -126,9 +126,11 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
if ((slow_rsp_threshold != 0) &&
time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) &&
(midEntry->command != command)) {
- /* smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command */
- if ((le16_to_cpu(midEntry->command) < NUMBER_OF_SMB2_COMMANDS) &&
- (le16_to_cpu(midEntry->command) >= 0))
+ /*
+ * smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command
+ * NB: le16_to_cpu returns unsigned so can not be negative below
+ */
+ if (le16_to_cpu(midEntry->command) < NUMBER_OF_SMB2_COMMANDS)
cifs_stats_inc(&midEntry->server->smb2slowcmd[le16_to_cpu(midEntry->command)]);
trace_smb3_slow_rsp(le16_to_cpu(midEntry->command),
@@ -385,7 +387,7 @@ smbd_done:
if (rc < 0 && rc != -EINTR)
cifs_dbg(VFS, "Error %d sending data on socket to server\n",
rc);
- else
+ else if (rc > 0)
rc = 0;
return rc;
@@ -781,8 +783,34 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
}
static void
-cifs_noop_callback(struct mid_q_entry *mid)
+cifs_compound_callback(struct mid_q_entry *mid)
+{
+ struct TCP_Server_Info *server = mid->server;
+ unsigned int optype = mid->optype;
+ unsigned int credits_received = 0;
+
+ if (mid->mid_state == MID_RESPONSE_RECEIVED) {
+ if (mid->resp_buf)
+ credits_received = server->ops->get_credits(mid);
+ else
+ cifs_dbg(FYI, "Bad state for cancelled MID\n");
+ }
+
+ add_credits(server, credits_received, optype);
+}
+
+static void
+cifs_compound_last_callback(struct mid_q_entry *mid)
+{
+ cifs_compound_callback(mid);
+ cifs_wake_up_task(mid);
+}
+
+static void
+cifs_cancelled_callback(struct mid_q_entry *mid)
{
+ cifs_compound_callback(mid);
+ DeleteMidQEntry(mid);
}
int
@@ -793,7 +821,8 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
int i, j, rc = 0;
int timeout, optype;
struct mid_q_entry *midQ[MAX_COMPOUND];
- unsigned int credits = 0;
+ bool cancelled_mid[MAX_COMPOUND] = {false};
+ unsigned int credits[MAX_COMPOUND] = {0};
char *buf;
timeout = flags & CIFS_TIMEOUT_MASK;
@@ -811,13 +840,31 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
return -ENOENT;
/*
- * Ensure that we do not send more than 50 overlapping requests
- * to the same server. We may make this configurable later or
- * use ses->maxReq.
+ * Ensure we obtain 1 credit per request in the compound chain.
+ * It can be optimized further by waiting for all the credits
+ * at once but this can wait long enough if we don't have enough
+ * credits due to some heavy operations in progress or the server
+ * not granting us much, so a fallback to the current approach is
+ * needed anyway.
*/
- rc = wait_for_free_request(ses->server, timeout, optype);
- if (rc)
- return rc;
+ for (i = 0; i < num_rqst; i++) {
+ rc = wait_for_free_request(ses->server, timeout, optype);
+ if (rc) {
+ /*
+ * We haven't sent an SMB packet to the server yet but
+ * we already obtained credits for i requests in the
+ * compound chain - need to return those credits back
+ * for future use. Note that we need to call add_credits
+ * multiple times to match the way we obtained credits
+ * in the first place and to account for in flight
+ * requests correctly.
+ */
+ for (j = 0; j < i; j++)
+ add_credits(ses->server, 1, optype);
+ return rc;
+ }
+ credits[i] = 1;
+ }
/*
* Make sure that we sign in the same order that we send on this socket
@@ -833,18 +880,24 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
for (j = 0; j < i; j++)
cifs_delete_mid(midQ[j]);
mutex_unlock(&ses->server->srv_mutex);
+
/* Update # of requests on wire to server */
- add_credits(ses->server, 1, optype);
+ for (j = 0; j < num_rqst; j++)
+ add_credits(ses->server, credits[j], optype);
return PTR_ERR(midQ[i]);
}
midQ[i]->mid_state = MID_REQUEST_SUBMITTED;
+ midQ[i]->optype = optype;
/*
- * We don't invoke the callback compounds unless it is the last
- * request.
+ * Invoke callback for every part of the compound chain
+ * to calculate credits properly. Wake up this thread only when
+ * the last element is received.
*/
if (i < num_rqst - 1)
- midQ[i]->callback = cifs_noop_callback;
+ midQ[i]->callback = cifs_compound_callback;
+ else
+ midQ[i]->callback = cifs_compound_last_callback;
}
cifs_in_send_inc(ses->server);
rc = smb_send_rqst(ses->server, num_rqst, rqst, flags);
@@ -858,8 +911,20 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
mutex_unlock(&ses->server->srv_mutex);
- if (rc < 0)
+ if (rc < 0) {
+ /* Sending failed for some reason - return credits back */
+ for (i = 0; i < num_rqst; i++)
+ add_credits(ses->server, credits[i], optype);
goto out;
+ }
+
+ /*
+ * At this point the request is passed to the network stack - we assume
+ * that any credits taken from the server structure on the client have
+ * been spent and we can't return them back. Once we receive responses
+ * we will collect credits granted by the server in the mid callbacks
+ * and add those credits to the server structure.
+ */
/*
* Compounding is never used during session establish.
@@ -873,36 +938,34 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
for (i = 0; i < num_rqst; i++) {
rc = wait_for_response(ses->server, midQ[i]);
- if (rc != 0) {
+ if (rc != 0)
+ break;
+ }
+ if (rc != 0) {
+ for (; i < num_rqst; i++) {
cifs_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n",
midQ[i]->mid, le16_to_cpu(midQ[i]->command));
send_cancel(ses->server, &rqst[i], midQ[i]);
spin_lock(&GlobalMid_Lock);
if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) {
midQ[i]->mid_flags |= MID_WAIT_CANCELLED;
- midQ[i]->callback = DeleteMidQEntry;
- spin_unlock(&GlobalMid_Lock);
- add_credits(ses->server, 1, optype);
- return rc;
+ midQ[i]->callback = cifs_cancelled_callback;
+ cancelled_mid[i] = true;
+ credits[i] = 0;
}
spin_unlock(&GlobalMid_Lock);
}
}
- for (i = 0; i < num_rqst; i++)
- if (midQ[i]->resp_buf)
- credits += ses->server->ops->get_credits(midQ[i]);
- if (!credits)
- credits = 1;
-
for (i = 0; i < num_rqst; i++) {
if (rc < 0)
goto out;
rc = cifs_sync_mid_result(midQ[i], ses->server);
if (rc != 0) {
- add_credits(ses->server, credits, optype);
- return rc;
+ /* mark this mid as cancelled to not free it below */
+ cancelled_mid[i] = true;
+ goto out;
}
if (!midQ[i]->resp_buf ||
@@ -949,9 +1012,10 @@ out:
* This is prevented above by using a noop callback that will not
* wake this thread except for the very last PDU.
*/
- for (i = 0; i < num_rqst; i++)
- cifs_delete_mid(midQ[i]);
- add_credits(ses->server, credits, optype);
+ for (i = 0; i < num_rqst; i++) {
+ if (!cancelled_mid[i])
+ cifs_delete_mid(midQ[i]);
+ }
return rc;
}
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 0f46cf550907..4dc788e3bc96 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -133,15 +133,25 @@ struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags)
}
EXPORT_SYMBOL(fscrypt_get_ctx);
+void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
+ const struct fscrypt_info *ci)
+{
+ memset(iv, 0, ci->ci_mode->ivsize);
+ iv->lblk_num = cpu_to_le64(lblk_num);
+
+ if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY)
+ memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+
+ if (ci->ci_essiv_tfm != NULL)
+ crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw);
+}
+
int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
u64 lblk_num, struct page *src_page,
struct page *dest_page, unsigned int len,
unsigned int offs, gfp_t gfp_flags)
{
- struct {
- __le64 index;
- u8 padding[FS_IV_SIZE - sizeof(__le64)];
- } iv;
+ union fscrypt_iv iv;
struct skcipher_request *req = NULL;
DECLARE_CRYPTO_WAIT(wait);
struct scatterlist dst, src;
@@ -151,15 +161,7 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
BUG_ON(len == 0);
- BUILD_BUG_ON(sizeof(iv) != FS_IV_SIZE);
- BUILD_BUG_ON(AES_BLOCK_SIZE != FS_IV_SIZE);
- iv.index = cpu_to_le64(lblk_num);
- memset(iv.padding, 0, sizeof(iv.padding));
-
- if (ci->ci_essiv_tfm != NULL) {
- crypto_cipher_encrypt_one(ci->ci_essiv_tfm, (u8 *)&iv,
- (u8 *)&iv);
- }
+ fscrypt_generate_iv(&iv, lblk_num, ci);
req = skcipher_request_alloc(tfm, gfp_flags);
if (!req)
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index d7a0f682ca12..7ff40a73dbec 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -40,10 +40,11 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname,
{
struct skcipher_request *req = NULL;
DECLARE_CRYPTO_WAIT(wait);
- struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm;
- int res = 0;
- char iv[FS_CRYPTO_BLOCK_SIZE];
+ struct fscrypt_info *ci = inode->i_crypt_info;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
+ union fscrypt_iv iv;
struct scatterlist sg;
+ int res;
/*
* Copy the filename to the output buffer for encrypting in-place and
@@ -55,7 +56,7 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname,
memset(out + iname->len, 0, olen - iname->len);
/* Initialize the IV */
- memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
+ fscrypt_generate_iv(&iv, 0, ci);
/* Set up the encryption request */
req = skcipher_request_alloc(tfm, GFP_NOFS);
@@ -65,7 +66,7 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
crypto_req_done, &wait);
sg_init_one(&sg, out, olen);
- skcipher_request_set_crypt(req, &sg, &sg, olen, iv);
+ skcipher_request_set_crypt(req, &sg, &sg, olen, &iv);
/* Do the encryption */
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
@@ -94,9 +95,10 @@ static int fname_decrypt(struct inode *inode,
struct skcipher_request *req = NULL;
DECLARE_CRYPTO_WAIT(wait);
struct scatterlist src_sg, dst_sg;
- struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm;
- int res = 0;
- char iv[FS_CRYPTO_BLOCK_SIZE];
+ struct fscrypt_info *ci = inode->i_crypt_info;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
+ union fscrypt_iv iv;
+ int res;
/* Allocate request */
req = skcipher_request_alloc(tfm, GFP_NOFS);
@@ -107,12 +109,12 @@ static int fname_decrypt(struct inode *inode,
crypto_req_done, &wait);
/* Initialize IV */
- memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
+ fscrypt_generate_iv(&iv, 0, ci);
/* Create decryption request */
sg_init_one(&src_sg, iname->name, iname->len);
sg_init_one(&dst_sg, oname->name, oname->len);
- skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, &iv);
res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
skcipher_request_free(req);
if (res < 0) {
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 79debfc9cef9..7424f851eb5c 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -17,7 +17,6 @@
#include <crypto/hash.h>
/* Encryption parameters */
-#define FS_IV_SIZE 16
#define FS_KEY_DERIVATION_NONCE_SIZE 16
/**
@@ -52,16 +51,42 @@ struct fscrypt_symlink_data {
} __packed;
/*
- * A pointer to this structure is stored in the file system's in-core
- * representation of an inode.
+ * fscrypt_info - the "encryption key" for an inode
+ *
+ * When an encrypted file's key is made available, an instance of this struct is
+ * allocated and stored in ->i_crypt_info. Once created, it remains until the
+ * inode is evicted.
*/
struct fscrypt_info {
+
+ /* The actual crypto transform used for encryption and decryption */
+ struct crypto_skcipher *ci_ctfm;
+
+ /*
+ * Cipher for ESSIV IV generation. Only set for CBC contents
+ * encryption, otherwise is NULL.
+ */
+ struct crypto_cipher *ci_essiv_tfm;
+
+ /*
+ * Encryption mode used for this inode. It corresponds to either
+ * ci_data_mode or ci_filename_mode, depending on the inode type.
+ */
+ struct fscrypt_mode *ci_mode;
+
+ /*
+ * If non-NULL, then this inode uses a master key directly rather than a
+ * derived key, and ci_ctfm will equal ci_master_key->mk_ctfm.
+ * Otherwise, this inode uses a derived key.
+ */
+ struct fscrypt_master_key *ci_master_key;
+
+ /* fields from the fscrypt_context */
u8 ci_data_mode;
u8 ci_filename_mode;
u8 ci_flags;
- struct crypto_skcipher *ci_ctfm;
- struct crypto_cipher *ci_essiv_tfm;
- u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
+ u8 ci_master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+ u8 ci_nonce[FS_KEY_DERIVATION_NONCE_SIZE];
};
typedef enum {
@@ -83,6 +108,10 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
return true;
+ if (contents_mode == FS_ENCRYPTION_MODE_ADIANTUM &&
+ filenames_mode == FS_ENCRYPTION_MODE_ADIANTUM)
+ return true;
+
return false;
}
@@ -107,6 +136,22 @@ fscrypt_msg(struct super_block *sb, const char *level, const char *fmt, ...);
#define fscrypt_err(sb, fmt, ...) \
fscrypt_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__)
+#define FSCRYPT_MAX_IV_SIZE 32
+
+union fscrypt_iv {
+ struct {
+ /* logical block number within the file */
+ __le64 lblk_num;
+
+ /* per-file nonce; only set in DIRECT_KEY mode */
+ u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+ };
+ u8 raw[FSCRYPT_MAX_IV_SIZE];
+};
+
+void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
+ const struct fscrypt_info *ci);
+
/* fname.c */
extern int fname_encrypt(struct inode *inode, const struct qstr *iname,
u8 *out, unsigned int olen);
@@ -115,6 +160,16 @@ extern bool fscrypt_fname_encrypted_size(const struct inode *inode,
u32 *encrypted_len_ret);
/* keyinfo.c */
+
+struct fscrypt_mode {
+ const char *friendly_name;
+ const char *cipher_str;
+ int keysize;
+ int ivsize;
+ bool logged_impl_name;
+ bool needs_essiv;
+};
+
extern void __exit fscrypt_essiv_cleanup(void);
#endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 7874c9bb2fc5..1e11a683f63d 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -10,15 +10,21 @@
*/
#include <keys/user-type.h>
+#include <linux/hashtable.h>
#include <linux/scatterlist.h>
#include <linux/ratelimit.h>
#include <crypto/aes.h>
+#include <crypto/algapi.h>
#include <crypto/sha.h>
#include <crypto/skcipher.h>
#include "fscrypt_private.h"
static struct crypto_shash *essiv_hash_tfm;
+/* Table of keys referenced by FS_POLICY_FLAG_DIRECT_KEY policies */
+static DEFINE_HASHTABLE(fscrypt_master_keys, 6); /* 6 bits = 64 buckets */
+static DEFINE_SPINLOCK(fscrypt_master_keys_lock);
+
/*
* Key derivation function. This generates the derived key by encrypting the
* master key with AES-128-ECB using the inode's nonce as the AES key.
@@ -123,56 +129,37 @@ invalid:
return ERR_PTR(-ENOKEY);
}
-/* Find the master key, then derive the inode's actual encryption key */
-static int find_and_derive_key(const struct inode *inode,
- const struct fscrypt_context *ctx,
- u8 *derived_key, unsigned int derived_keysize)
-{
- struct key *key;
- const struct fscrypt_key *payload;
- int err;
-
- key = find_and_lock_process_key(FS_KEY_DESC_PREFIX,
- ctx->master_key_descriptor,
- derived_keysize, &payload);
- if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) {
- key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix,
- ctx->master_key_descriptor,
- derived_keysize, &payload);
- }
- if (IS_ERR(key))
- return PTR_ERR(key);
- err = derive_key_aes(payload->raw, ctx, derived_key, derived_keysize);
- up_read(&key->sem);
- key_put(key);
- return err;
-}
-
-static struct fscrypt_mode {
- const char *friendly_name;
- const char *cipher_str;
- int keysize;
- bool logged_impl_name;
-} available_modes[] = {
+static struct fscrypt_mode available_modes[] = {
[FS_ENCRYPTION_MODE_AES_256_XTS] = {
.friendly_name = "AES-256-XTS",
.cipher_str = "xts(aes)",
.keysize = 64,
+ .ivsize = 16,
},
[FS_ENCRYPTION_MODE_AES_256_CTS] = {
.friendly_name = "AES-256-CTS-CBC",
.cipher_str = "cts(cbc(aes))",
.keysize = 32,
+ .ivsize = 16,
},
[FS_ENCRYPTION_MODE_AES_128_CBC] = {
.friendly_name = "AES-128-CBC",
.cipher_str = "cbc(aes)",
.keysize = 16,
+ .ivsize = 16,
+ .needs_essiv = true,
},
[FS_ENCRYPTION_MODE_AES_128_CTS] = {
.friendly_name = "AES-128-CTS-CBC",
.cipher_str = "cts(cbc(aes))",
.keysize = 16,
+ .ivsize = 16,
+ },
+ [FS_ENCRYPTION_MODE_ADIANTUM] = {
+ .friendly_name = "Adiantum",
+ .cipher_str = "adiantum(xchacha12,aes)",
+ .keysize = 32,
+ .ivsize = 32,
},
};
@@ -198,14 +185,196 @@ select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode)
return ERR_PTR(-EINVAL);
}
-static void put_crypt_info(struct fscrypt_info *ci)
+/* Find the master key, then derive the inode's actual encryption key */
+static int find_and_derive_key(const struct inode *inode,
+ const struct fscrypt_context *ctx,
+ u8 *derived_key, const struct fscrypt_mode *mode)
{
- if (!ci)
+ struct key *key;
+ const struct fscrypt_key *payload;
+ int err;
+
+ key = find_and_lock_process_key(FS_KEY_DESC_PREFIX,
+ ctx->master_key_descriptor,
+ mode->keysize, &payload);
+ if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) {
+ key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix,
+ ctx->master_key_descriptor,
+ mode->keysize, &payload);
+ }
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+
+ if (ctx->flags & FS_POLICY_FLAG_DIRECT_KEY) {
+ if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) {
+ fscrypt_warn(inode->i_sb,
+ "direct key mode not allowed with %s",
+ mode->friendly_name);
+ err = -EINVAL;
+ } else if (ctx->contents_encryption_mode !=
+ ctx->filenames_encryption_mode) {
+ fscrypt_warn(inode->i_sb,
+ "direct key mode not allowed with different contents and filenames modes");
+ err = -EINVAL;
+ } else {
+ memcpy(derived_key, payload->raw, mode->keysize);
+ err = 0;
+ }
+ } else {
+ err = derive_key_aes(payload->raw, ctx, derived_key,
+ mode->keysize);
+ }
+ up_read(&key->sem);
+ key_put(key);
+ return err;
+}
+
+/* Allocate and key a symmetric cipher object for the given encryption mode */
+static struct crypto_skcipher *
+allocate_skcipher_for_mode(struct fscrypt_mode *mode, const u8 *raw_key,
+ const struct inode *inode)
+{
+ struct crypto_skcipher *tfm;
+ int err;
+
+ tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
+ if (IS_ERR(tfm)) {
+ fscrypt_warn(inode->i_sb,
+ "error allocating '%s' transform for inode %lu: %ld",
+ mode->cipher_str, inode->i_ino, PTR_ERR(tfm));
+ return tfm;
+ }
+ if (unlikely(!mode->logged_impl_name)) {
+ /*
+ * fscrypt performance can vary greatly depending on which
+ * crypto algorithm implementation is used. Help people debug
+ * performance problems by logging the ->cra_driver_name the
+ * first time a mode is used. Note that multiple threads can
+ * race here, but it doesn't really matter.
+ */
+ mode->logged_impl_name = true;
+ pr_info("fscrypt: %s using implementation \"%s\"\n",
+ mode->friendly_name,
+ crypto_skcipher_alg(tfm)->base.cra_driver_name);
+ }
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize);
+ if (err)
+ goto err_free_tfm;
+
+ return tfm;
+
+err_free_tfm:
+ crypto_free_skcipher(tfm);
+ return ERR_PTR(err);
+}
+
+/* Master key referenced by FS_POLICY_FLAG_DIRECT_KEY policy */
+struct fscrypt_master_key {
+ struct hlist_node mk_node;
+ refcount_t mk_refcount;
+ const struct fscrypt_mode *mk_mode;
+ struct crypto_skcipher *mk_ctfm;
+ u8 mk_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+ u8 mk_raw[FS_MAX_KEY_SIZE];
+};
+
+static void free_master_key(struct fscrypt_master_key *mk)
+{
+ if (mk) {
+ crypto_free_skcipher(mk->mk_ctfm);
+ kzfree(mk);
+ }
+}
+
+static void put_master_key(struct fscrypt_master_key *mk)
+{
+ if (!refcount_dec_and_lock(&mk->mk_refcount, &fscrypt_master_keys_lock))
return;
+ hash_del(&mk->mk_node);
+ spin_unlock(&fscrypt_master_keys_lock);
- crypto_free_skcipher(ci->ci_ctfm);
- crypto_free_cipher(ci->ci_essiv_tfm);
- kmem_cache_free(fscrypt_info_cachep, ci);
+ free_master_key(mk);
+}
+
+/*
+ * Find/insert the given master key into the fscrypt_master_keys table. If
+ * found, it is returned with elevated refcount, and 'to_insert' is freed if
+ * non-NULL. If not found, 'to_insert' is inserted and returned if it's
+ * non-NULL; otherwise NULL is returned.
+ */
+static struct fscrypt_master_key *
+find_or_insert_master_key(struct fscrypt_master_key *to_insert,
+ const u8 *raw_key, const struct fscrypt_mode *mode,
+ const struct fscrypt_info *ci)
+{
+ unsigned long hash_key;
+ struct fscrypt_master_key *mk;
+
+ /*
+ * Careful: to avoid potentially leaking secret key bytes via timing
+ * information, we must key the hash table by descriptor rather than by
+ * raw key, and use crypto_memneq() when comparing raw keys.
+ */
+
+ BUILD_BUG_ON(sizeof(hash_key) > FS_KEY_DESCRIPTOR_SIZE);
+ memcpy(&hash_key, ci->ci_master_key_descriptor, sizeof(hash_key));
+
+ spin_lock(&fscrypt_master_keys_lock);
+ hash_for_each_possible(fscrypt_master_keys, mk, mk_node, hash_key) {
+ if (memcmp(ci->ci_master_key_descriptor, mk->mk_descriptor,
+ FS_KEY_DESCRIPTOR_SIZE) != 0)
+ continue;
+ if (mode != mk->mk_mode)
+ continue;
+ if (crypto_memneq(raw_key, mk->mk_raw, mode->keysize))
+ continue;
+ /* using existing tfm with same (descriptor, mode, raw_key) */
+ refcount_inc(&mk->mk_refcount);
+ spin_unlock(&fscrypt_master_keys_lock);
+ free_master_key(to_insert);
+ return mk;
+ }
+ if (to_insert)
+ hash_add(fscrypt_master_keys, &to_insert->mk_node, hash_key);
+ spin_unlock(&fscrypt_master_keys_lock);
+ return to_insert;
+}
+
+/* Prepare to encrypt directly using the master key in the given mode */
+static struct fscrypt_master_key *
+fscrypt_get_master_key(const struct fscrypt_info *ci, struct fscrypt_mode *mode,
+ const u8 *raw_key, const struct inode *inode)
+{
+ struct fscrypt_master_key *mk;
+ int err;
+
+ /* Is there already a tfm for this key? */
+ mk = find_or_insert_master_key(NULL, raw_key, mode, ci);
+ if (mk)
+ return mk;
+
+ /* Nope, allocate one. */
+ mk = kzalloc(sizeof(*mk), GFP_NOFS);
+ if (!mk)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&mk->mk_refcount, 1);
+ mk->mk_mode = mode;
+ mk->mk_ctfm = allocate_skcipher_for_mode(mode, raw_key, inode);
+ if (IS_ERR(mk->mk_ctfm)) {
+ err = PTR_ERR(mk->mk_ctfm);
+ mk->mk_ctfm = NULL;
+ goto err_free_mk;
+ }
+ memcpy(mk->mk_descriptor, ci->ci_master_key_descriptor,
+ FS_KEY_DESCRIPTOR_SIZE);
+ memcpy(mk->mk_raw, raw_key, mode->keysize);
+
+ return find_or_insert_master_key(mk, raw_key, mode, ci);
+
+err_free_mk:
+ free_master_key(mk);
+ return ERR_PTR(err);
}
static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
@@ -275,11 +444,67 @@ void __exit fscrypt_essiv_cleanup(void)
crypto_free_shash(essiv_hash_tfm);
}
+/*
+ * Given the encryption mode and key (normally the derived key, but for
+ * FS_POLICY_FLAG_DIRECT_KEY mode it's the master key), set up the inode's
+ * symmetric cipher transform object(s).
+ */
+static int setup_crypto_transform(struct fscrypt_info *ci,
+ struct fscrypt_mode *mode,
+ const u8 *raw_key, const struct inode *inode)
+{
+ struct fscrypt_master_key *mk;
+ struct crypto_skcipher *ctfm;
+ int err;
+
+ if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY) {
+ mk = fscrypt_get_master_key(ci, mode, raw_key, inode);
+ if (IS_ERR(mk))
+ return PTR_ERR(mk);
+ ctfm = mk->mk_ctfm;
+ } else {
+ mk = NULL;
+ ctfm = allocate_skcipher_for_mode(mode, raw_key, inode);
+ if (IS_ERR(ctfm))
+ return PTR_ERR(ctfm);
+ }
+ ci->ci_master_key = mk;
+ ci->ci_ctfm = ctfm;
+
+ if (mode->needs_essiv) {
+ /* ESSIV implies 16-byte IVs which implies !DIRECT_KEY */
+ WARN_ON(mode->ivsize != AES_BLOCK_SIZE);
+ WARN_ON(ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY);
+
+ err = init_essiv_generator(ci, raw_key, mode->keysize);
+ if (err) {
+ fscrypt_warn(inode->i_sb,
+ "error initializing ESSIV generator for inode %lu: %d",
+ inode->i_ino, err);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static void put_crypt_info(struct fscrypt_info *ci)
+{
+ if (!ci)
+ return;
+
+ if (ci->ci_master_key) {
+ put_master_key(ci->ci_master_key);
+ } else {
+ crypto_free_skcipher(ci->ci_ctfm);
+ crypto_free_cipher(ci->ci_essiv_tfm);
+ }
+ kmem_cache_free(fscrypt_info_cachep, ci);
+}
+
int fscrypt_get_encryption_info(struct inode *inode)
{
struct fscrypt_info *crypt_info;
struct fscrypt_context ctx;
- struct crypto_skcipher *ctfm;
struct fscrypt_mode *mode;
u8 *raw_key = NULL;
int res;
@@ -312,74 +537,42 @@ int fscrypt_get_encryption_info(struct inode *inode)
if (ctx.flags & ~FS_POLICY_FLAGS_VALID)
return -EINVAL;
- crypt_info = kmem_cache_alloc(fscrypt_info_cachep, GFP_NOFS);
+ crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_NOFS);
if (!crypt_info)
return -ENOMEM;
crypt_info->ci_flags = ctx.flags;
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
- crypt_info->ci_ctfm = NULL;
- crypt_info->ci_essiv_tfm = NULL;
- memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
- sizeof(crypt_info->ci_master_key));
+ memcpy(crypt_info->ci_master_key_descriptor, ctx.master_key_descriptor,
+ FS_KEY_DESCRIPTOR_SIZE);
+ memcpy(crypt_info->ci_nonce, ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
mode = select_encryption_mode(crypt_info, inode);
if (IS_ERR(mode)) {
res = PTR_ERR(mode);
goto out;
}
+ WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE);
+ crypt_info->ci_mode = mode;
/*
- * This cannot be a stack buffer because it is passed to the scatterlist
- * crypto API as part of key derivation.
+ * This cannot be a stack buffer because it may be passed to the
+ * scatterlist crypto API as part of key derivation.
*/
res = -ENOMEM;
raw_key = kmalloc(mode->keysize, GFP_NOFS);
if (!raw_key)
goto out;
- res = find_and_derive_key(inode, &ctx, raw_key, mode->keysize);
+ res = find_and_derive_key(inode, &ctx, raw_key, mode);
if (res)
goto out;
- ctfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
- if (IS_ERR(ctfm)) {
- res = PTR_ERR(ctfm);
- fscrypt_warn(inode->i_sb,
- "error allocating '%s' transform for inode %lu: %d",
- mode->cipher_str, inode->i_ino, res);
- goto out;
- }
- if (unlikely(!mode->logged_impl_name)) {
- /*
- * fscrypt performance can vary greatly depending on which
- * crypto algorithm implementation is used. Help people debug
- * performance problems by logging the ->cra_driver_name the
- * first time a mode is used. Note that multiple threads can
- * race here, but it doesn't really matter.
- */
- mode->logged_impl_name = true;
- pr_info("fscrypt: %s using implementation \"%s\"\n",
- mode->friendly_name,
- crypto_skcipher_alg(ctfm)->base.cra_driver_name);
- }
- crypt_info->ci_ctfm = ctfm;
- crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
- res = crypto_skcipher_setkey(ctfm, raw_key, mode->keysize);
+ res = setup_crypto_transform(crypt_info, mode, raw_key, inode);
if (res)
goto out;
- if (S_ISREG(inode->i_mode) &&
- crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) {
- res = init_essiv_generator(crypt_info, raw_key, mode->keysize);
- if (res) {
- fscrypt_warn(inode->i_sb,
- "error initializing ESSIV generator for inode %lu: %d",
- inode->i_ino, res);
- goto out;
- }
- }
if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL)
crypt_info = NULL;
out:
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index c6d431a5cce9..f490de921ce8 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -199,7 +199,8 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
child_ci = child->i_crypt_info;
if (parent_ci && child_ci) {
- return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key,
+ return memcmp(parent_ci->ci_master_key_descriptor,
+ child_ci->ci_master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
(parent_ci->ci_filename_mode ==
@@ -254,7 +255,7 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
ctx.contents_encryption_mode = ci->ci_data_mode;
ctx.filenames_encryption_mode = ci->ci_filename_mode;
ctx.flags = ci->ci_flags;
- memcpy(ctx.master_key_descriptor, ci->ci_master_key,
+ memcpy(ctx.master_key_descriptor, ci->ci_master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE);
diff --git a/fs/dax.c b/fs/dax.c
index 48132eca3761..6959837cc465 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -246,18 +246,16 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry)
ewait.wait.func = wake_exceptional_entry_func;
wq = dax_entry_waitqueue(xas, entry, &ewait.key);
- prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE);
+ /*
+ * Unlike get_unlocked_entry() there is no guarantee that this
+ * path ever successfully retrieves an unlocked entry before an
+ * inode dies. Perform a non-exclusive wait in case this path
+ * never successfully performs its own wake up.
+ */
+ prepare_to_wait(wq, &ewait.wait, TASK_UNINTERRUPTIBLE);
xas_unlock_irq(xas);
schedule();
finish_wait(wq, &ewait.wait);
-
- /*
- * Entry lock waits are exclusive. Wake up the next waiter since
- * we aren't sure we will acquire the entry lock and thus wake
- * the next waiter up on unlock.
- */
- if (waitqueue_active(wq))
- __wake_up(wq, TASK_NORMAL, 1, &ewait.key);
}
static void put_unlocked_entry(struct xa_state *xas, void *entry)
@@ -779,7 +777,8 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
i_mmap_lock_read(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
- unsigned long address, start, end;
+ struct mmu_notifier_range range;
+ unsigned long address;
cond_resched();
@@ -793,7 +792,8 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
* call mmu_notifier_invalidate_range_start() on our behalf
* before taking any lock.
*/
- if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl))
+ if (follow_pte_pmd(vma->vm_mm, address, &range,
+ &ptep, &pmdp, &ptl))
continue;
/*
@@ -835,7 +835,7 @@ unlock_pte:
pte_unmap_unlock(ptep, ptl);
}
- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
+ mmu_notifier_invalidate_range_end(&range);
}
i_mmap_unlock_read(mapping);
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 41a0e97252ae..dbc1a1f080ce 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -518,7 +518,7 @@ static struct bio *dio_await_one(struct dio *dio)
dio->waiter = current;
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(dio->bio_disk->queue, dio->bio_cookie))
+ !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true))
io_schedule();
/* wake up sets us TASK_RUNNING */
spin_lock_irqsave(&dio->bio_lock, flags);
@@ -1265,6 +1265,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
} else {
dio->op = REQ_OP_READ;
}
+ if (iocb->ki_flags & IOCB_HIPRI)
+ dio->op_flags |= REQ_HIPRI;
/*
* For AIO O_(D)SYNC writes we need to defer completions to a workqueue
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 562fa8c3edff..47ee66d70109 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -292,6 +292,8 @@ void dlm_callback_suspend(struct dlm_ls *ls)
flush_workqueue(ls->ls_callback_wq);
}
+#define MAX_CB_QUEUE 25
+
void dlm_callback_resume(struct dlm_ls *ls)
{
struct dlm_lkb *lkb, *safe;
@@ -302,15 +304,23 @@ void dlm_callback_resume(struct dlm_ls *ls)
if (!ls->ls_callback_wq)
return;
+more:
mutex_lock(&ls->ls_cb_mutex);
list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
list_del_init(&lkb->lkb_cb_list);
queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
count++;
+ if (count == MAX_CB_QUEUE)
+ break;
}
mutex_unlock(&ls->ls_cb_mutex);
if (count)
log_rinfo(ls, "dlm_callback_resume %d", count);
+ if (count == MAX_CB_QUEUE) {
+ count = 0;
+ cond_resched();
+ goto more;
+ }
}
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index cc91963683de..a928ba008d7d 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1209,6 +1209,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
if (rv < 0) {
log_error(ls, "create_lkb idr error %d", rv);
+ dlm_free_lkb(lkb);
return rv;
}
@@ -4179,6 +4180,7 @@ static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
(unsigned long long)lkb->lkb_recover_seq,
ms->m_header.h_nodeid, ms->m_lkid);
error = -ENOENT;
+ dlm_put_lkb(lkb);
goto fail;
}
@@ -4232,6 +4234,7 @@ static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
lkb->lkb_id, lkb->lkb_remid,
ms->m_header.h_nodeid, ms->m_lkid);
error = -ENOENT;
+ dlm_put_lkb(lkb);
goto fail;
}
@@ -5792,20 +5795,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
goto out;
}
}
-
- /* After ua is attached to lkb it will be freed by dlm_free_lkb().
- When DLM_IFL_USER is set, the dlm knows that this is a userspace
- lock and that lkb_astparam is the dlm_user_args structure. */
-
error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
- lkb->lkb_flags |= DLM_IFL_USER;
-
if (error) {
+ kfree(ua->lksb.sb_lvbptr);
+ ua->lksb.sb_lvbptr = NULL;
+ kfree(ua);
__put_lkb(ls, lkb);
goto out;
}
+ /* After ua is attached to lkb it will be freed by dlm_free_lkb().
+ When DLM_IFL_USER is set, the dlm knows that this is a userspace
+ lock and that lkb_astparam is the dlm_user_args structure. */
+ lkb->lkb_flags |= DLM_IFL_USER;
error = request_lock(ls, lkb, name, namelen, &args);
switch (error) {
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 5ba94be006ee..db43b98c4d64 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -431,7 +431,7 @@ static int new_lockspace(const char *name, const char *cluster,
int do_unreg = 0;
int namelen = strlen(name);
- if (namelen > DLM_LOCKSPACE_LEN)
+ if (namelen > DLM_LOCKSPACE_LEN || namelen == 0)
return -EINVAL;
if (!lvblen || (lvblen % 8))
@@ -680,11 +680,9 @@ static int new_lockspace(const char *name, const char *cluster,
kfree(ls->ls_recover_buf);
out_lkbidr:
idr_destroy(&ls->ls_lkbidr);
- for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
- if (ls->ls_remove_names[i])
- kfree(ls->ls_remove_names[i]);
- }
out_rsbtbl:
+ for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++)
+ kfree(ls->ls_remove_names[i]);
vfree(ls->ls_rsbtbl);
out_lsfree:
if (do_unreg)
@@ -807,6 +805,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
dlm_delete_debug_file(ls);
+ idr_destroy(&ls->ls_recover_idr);
kfree(ls->ls_recover_buf);
/*
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 3fda3832cf6a..0bc43b35d2c5 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -671,7 +671,7 @@ int dlm_ls_stop(struct dlm_ls *ls)
int dlm_ls_start(struct dlm_ls *ls)
{
struct dlm_recover *rv, *rv_old;
- struct dlm_config_node *nodes;
+ struct dlm_config_node *nodes = NULL;
int error, count;
rv = kzalloc(sizeof(*rv), GFP_NOFS);
@@ -680,7 +680,7 @@ int dlm_ls_start(struct dlm_ls *ls)
error = dlm_config_nodes(ls->ls_name, &nodes, &count);
if (error < 0)
- goto fail;
+ goto fail_rv;
spin_lock(&ls->ls_recover_lock);
@@ -712,8 +712,9 @@ int dlm_ls_start(struct dlm_ls *ls)
return 0;
fail:
- kfree(rv);
kfree(nodes);
+ fail_rv:
+ kfree(rv);
return error;
}
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 7cd24bccd4fe..37be29f21d04 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -38,10 +38,8 @@ int __init dlm_memory_init(void)
void dlm_memory_exit(void)
{
- if (lkb_cache)
- kmem_cache_destroy(lkb_cache);
- if (rsb_cache)
- kmem_cache_destroy(rsb_cache);
+ kmem_cache_destroy(lkb_cache);
+ kmem_cache_destroy(rsb_cache);
}
char *dlm_allocate_lvb(struct dlm_ls *ls)
@@ -86,8 +84,7 @@ void dlm_free_lkb(struct dlm_lkb *lkb)
struct dlm_user_args *ua;
ua = lkb->lkb_ua;
if (ua) {
- if (ua->lksb.sb_lvbptr)
- kfree(ua->lksb.sb_lvbptr);
+ kfree(ua->lksb.sb_lvbptr);
kfree(ua);
}
}
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 2a669390cd7f..3c84c62dadb7 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -25,6 +25,7 @@
#include "lvb_table.h"
#include "user.h"
#include "ast.h"
+#include "config.h"
static const char name_prefix[] = "dlm";
static const struct file_operations device_fops;
@@ -404,7 +405,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- error = dlm_new_lockspace(params->name, NULL, params->flags,
+ error = dlm_new_lockspace(params->name, dlm_config.ci_cluster_name, params->flags,
DLM_USER_LVB_LEN, NULL, NULL, NULL,
&lockspace);
if (error)
@@ -702,7 +703,7 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat,
result.version[0] = DLM_DEVICE_VERSION_MAJOR;
result.version[1] = DLM_DEVICE_VERSION_MINOR;
result.version[2] = DLM_DEVICE_VERSION_PATCH;
- memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
+ memcpy(&result.lksb, &ua->lksb, offsetof(struct dlm_lksb, sb_lvbptr));
result.user_lksb = ua->user_lksb;
/* FIXME: dlm1 provides for the user's bastparam/addr to not be updated
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 42bbe6824b4b..a5d219d920e7 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -381,7 +381,8 @@ static void ep_nested_calls_init(struct nested_calls *ncalls)
*/
static inline int ep_events_available(struct eventpoll *ep)
{
- return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
+ return !list_empty_careful(&ep->rdllist) ||
+ READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR;
}
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -471,7 +472,6 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
* no re-entered.
*
* @ncalls: Pointer to the nested_calls structure to be used for this call.
- * @max_nests: Maximum number of allowed nesting calls.
* @nproc: Nested call core function pointer.
* @priv: Opaque data to be passed to the @nproc callback.
* @cookie: Cookie to be used to identify this nested call.
@@ -480,7 +480,7 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
* Returns: Returns the code returned by the @nproc callback, or -1 if
* the maximum recursion limit has been exceeded.
*/
-static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
+static int ep_call_nested(struct nested_calls *ncalls,
int (*nproc)(void *, void *, int), void *priv,
void *cookie, void *ctx)
{
@@ -499,7 +499,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
*/
list_for_each_entry(tncur, lsthead, llink) {
if (tncur->ctx == ctx &&
- (tncur->cookie == cookie || ++call_nests > max_nests)) {
+ (tncur->cookie == cookie || ++call_nests > EP_MAX_NESTS)) {
/*
* Ops ... loop detected or maximum nest level reached.
* We abort this wake by breaking the cycle itself.
@@ -573,7 +573,7 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
{
int this_cpu = get_cpu();
- ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
+ ep_call_nested(&poll_safewake_ncalls,
ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
put_cpu();
@@ -699,7 +699,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
*/
spin_lock_irq(&ep->wq.lock);
list_splice_init(&ep->rdllist, &txlist);
- ep->ovflist = NULL;
+ WRITE_ONCE(ep->ovflist, NULL);
spin_unlock_irq(&ep->wq.lock);
/*
@@ -713,7 +713,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
* other events might have been queued by the poll callback.
* We re-insert them inside the main ready-list here.
*/
- for (nepi = ep->ovflist; (epi = nepi) != NULL;
+ for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL;
nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
/*
* We need to check if the item is already in the list.
@@ -731,7 +731,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
* releasing the lock, events will be queued in the normal way inside
* ep->rdllist.
*/
- ep->ovflist = EP_UNACTIVE_PTR;
+ WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR);
/*
* Quickly re-inject items left on "txlist".
@@ -1154,10 +1154,10 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* semantics). All the events that happen during that period of time are
* chained in ep->ovflist and requeued later on.
*/
- if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
+ if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
if (epi->next == EP_UNACTIVE_PTR) {
- epi->next = ep->ovflist;
- ep->ovflist = epi;
+ epi->next = READ_ONCE(ep->ovflist);
+ WRITE_ONCE(ep->ovflist, epi);
if (epi->ws) {
/*
* Activate ep->ws since epi->ws may get
@@ -1333,7 +1333,6 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
}
} else {
error = ep_call_nested(&poll_loop_ncalls,
- EP_MAX_NESTS,
reverse_path_check_proc,
child_file, child_file,
current);
@@ -1367,7 +1366,7 @@ static int reverse_path_check(void)
/* let's call this for all tfiles */
list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
path_count_init();
- error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ error = ep_call_nested(&poll_loop_ncalls,
reverse_path_check_proc, current_file,
current_file, current);
if (error)
@@ -1626,21 +1625,24 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head
{
struct ep_send_events_data *esed = priv;
__poll_t revents;
- struct epitem *epi;
- struct epoll_event __user *uevent;
+ struct epitem *epi, *tmp;
+ struct epoll_event __user *uevent = esed->events;
struct wakeup_source *ws;
poll_table pt;
init_poll_funcptr(&pt, NULL);
+ esed->res = 0;
/*
* We can loop without lock because we are passed a task private list.
* Items cannot vanish during the loop because ep_scan_ready_list() is
* holding "mtx" during this call.
*/
- for (esed->res = 0, uevent = esed->events;
- !list_empty(head) && esed->res < esed->maxevents;) {
- epi = list_first_entry(head, struct epitem, rdllink);
+ lockdep_assert_held(&ep->mtx);
+
+ list_for_each_entry_safe(epi, tmp, head, rdllink) {
+ if (esed->res >= esed->maxevents)
+ break;
/*
* Activate ep->ws before deactivating epi->ws to prevent
@@ -1660,42 +1662,42 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head
list_del_init(&epi->rdllink);
- revents = ep_item_poll(epi, &pt, 1);
-
/*
* If the event mask intersect the caller-requested one,
* deliver the event to userspace. Again, ep_scan_ready_list()
- * is holding "mtx", so no operations coming from userspace
+ * is holding ep->mtx, so no operations coming from userspace
* can change the item.
*/
- if (revents) {
- if (__put_user(revents, &uevent->events) ||
- __put_user(epi->event.data, &uevent->data)) {
- list_add(&epi->rdllink, head);
- ep_pm_stay_awake(epi);
- if (!esed->res)
- esed->res = -EFAULT;
- return 0;
- }
- esed->res++;
- uevent++;
- if (epi->event.events & EPOLLONESHOT)
- epi->event.events &= EP_PRIVATE_BITS;
- else if (!(epi->event.events & EPOLLET)) {
- /*
- * If this file has been added with Level
- * Trigger mode, we need to insert back inside
- * the ready list, so that the next call to
- * epoll_wait() will check again the events
- * availability. At this point, no one can insert
- * into ep->rdllist besides us. The epoll_ctl()
- * callers are locked out by
- * ep_scan_ready_list() holding "mtx" and the
- * poll callback will queue them in ep->ovflist.
- */
- list_add_tail(&epi->rdllink, &ep->rdllist);
- ep_pm_stay_awake(epi);
- }
+ revents = ep_item_poll(epi, &pt, 1);
+ if (!revents)
+ continue;
+
+ if (__put_user(revents, &uevent->events) ||
+ __put_user(epi->event.data, &uevent->data)) {
+ list_add(&epi->rdllink, head);
+ ep_pm_stay_awake(epi);
+ if (!esed->res)
+ esed->res = -EFAULT;
+ return 0;
+ }
+ esed->res++;
+ uevent++;
+ if (epi->event.events & EPOLLONESHOT)
+ epi->event.events &= EP_PRIVATE_BITS;
+ else if (!(epi->event.events & EPOLLET)) {
+ /*
+ * If this file has been added with Level
+ * Trigger mode, we need to insert back inside
+ * the ready list, so that the next call to
+ * epoll_wait() will check again the events
+ * availability. At this point, no one can insert
+ * into ep->rdllist besides us. The epoll_ctl()
+ * callers are locked out by
+ * ep_scan_ready_list() holding "mtx" and the
+ * poll callback will queue them in ep->ovflist.
+ */
+ list_add_tail(&epi->rdllink, &ep->rdllist);
+ ep_pm_stay_awake(epi);
}
}
@@ -1747,6 +1749,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
{
int res = 0, eavail, timed_out = 0;
u64 slack = 0;
+ bool waiter = false;
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
@@ -1761,11 +1764,18 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
} else if (timeout == 0) {
/*
* Avoid the unnecessary trip to the wait queue loop, if the
- * caller specified a non blocking operation.
+ * caller specified a non blocking operation. We still need
+ * lock because we could race and not see an epi being added
+ * to the ready list while in irq callback. Thus incorrectly
+ * returning 0 back to userspace.
*/
timed_out = 1;
+
spin_lock_irq(&ep->wq.lock);
- goto check_events;
+ eavail = ep_events_available(ep);
+ spin_unlock_irq(&ep->wq.lock);
+
+ goto send_events;
}
fetch_events:
@@ -1773,64 +1783,66 @@ fetch_events:
if (!ep_events_available(ep))
ep_busy_loop(ep, timed_out);
- spin_lock_irq(&ep->wq.lock);
+ eavail = ep_events_available(ep);
+ if (eavail)
+ goto send_events;
- if (!ep_events_available(ep)) {
- /*
- * Busy poll timed out. Drop NAPI ID for now, we can add
- * it back in when we have moved a socket with a valid NAPI
- * ID onto the ready list.
- */
- ep_reset_busy_poll_napi_id(ep);
+ /*
+ * Busy poll timed out. Drop NAPI ID for now, we can add
+ * it back in when we have moved a socket with a valid NAPI
+ * ID onto the ready list.
+ */
+ ep_reset_busy_poll_napi_id(ep);
- /*
- * We don't have any available event to return to the caller.
- * We need to sleep here, and we will be wake up by
- * ep_poll_callback() when events will become available.
- */
+ /*
+ * We don't have any available event to return to the caller. We need
+ * to sleep here, and we will be woken by ep_poll_callback() when events
+ * become available.
+ */
+ if (!waiter) {
+ waiter = true;
init_waitqueue_entry(&wait, current);
- __add_wait_queue_exclusive(&ep->wq, &wait);
- for (;;) {
- /*
- * We don't want to sleep if the ep_poll_callback() sends us
- * a wakeup in between. That's why we set the task state
- * to TASK_INTERRUPTIBLE before doing the checks.
- */
- set_current_state(TASK_INTERRUPTIBLE);
- /*
- * Always short-circuit for fatal signals to allow
- * threads to make a timely exit without the chance of
- * finding more events available and fetching
- * repeatedly.
- */
- if (fatal_signal_pending(current)) {
- res = -EINTR;
- break;
- }
- if (ep_events_available(ep) || timed_out)
- break;
- if (signal_pending(current)) {
- res = -EINTR;
- break;
- }
+ spin_lock_irq(&ep->wq.lock);
+ __add_wait_queue_exclusive(&ep->wq, &wait);
+ spin_unlock_irq(&ep->wq.lock);
+ }
- spin_unlock_irq(&ep->wq.lock);
- if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
- timed_out = 1;
+ for (;;) {
+ /*
+ * We don't want to sleep if the ep_poll_callback() sends us
+ * a wakeup in between. That's why we set the task state
+ * to TASK_INTERRUPTIBLE before doing the checks.
+ */
+ set_current_state(TASK_INTERRUPTIBLE);
+ /*
+ * Always short-circuit for fatal signals to allow
+ * threads to make a timely exit without the chance of
+ * finding more events available and fetching
+ * repeatedly.
+ */
+ if (fatal_signal_pending(current)) {
+ res = -EINTR;
+ break;
+ }
- spin_lock_irq(&ep->wq.lock);
+ eavail = ep_events_available(ep);
+ if (eavail)
+ break;
+ if (signal_pending(current)) {
+ res = -EINTR;
+ break;
}
- __remove_wait_queue(&ep->wq, &wait);
- __set_current_state(TASK_RUNNING);
+ if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) {
+ timed_out = 1;
+ break;
+ }
}
-check_events:
- /* Is it worth to try to dig for events ? */
- eavail = ep_events_available(ep);
- spin_unlock_irq(&ep->wq.lock);
+ __set_current_state(TASK_RUNNING);
+send_events:
/*
* Try to transfer events to user space. In case we get 0 events and
* there's still timeout left over, we go trying again in search of
@@ -1840,6 +1852,12 @@ check_events:
!(res = ep_send_events(ep, events, maxevents)) && !timed_out)
goto fetch_events;
+ if (waiter) {
+ spin_lock_irq(&ep->wq.lock);
+ __remove_wait_queue(&ep->wq, &wait);
+ spin_unlock_irq(&ep->wq.lock);
+ }
+
return res;
}
@@ -1876,7 +1894,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
ep_tovisit = epi->ffd.file->private_data;
if (ep_tovisit->visited)
continue;
- error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ error = ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, epi->ffd.file,
ep_tovisit, current);
if (error != 0)
@@ -1916,7 +1934,7 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
int ret;
struct eventpoll *ep_cur, *ep_next;
- ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ ret = ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, file, ep, current);
/* clear visited list */
list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
@@ -2172,7 +2190,7 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events,
return -EINVAL;
/* Verify that the area passed by the user is writeable */
- if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event)))
+ if (!access_ok(events, maxevents * sizeof(struct epoll_event)))
return -EFAULT;
/* Get the "struct file *" for the eventpoll file */
@@ -2223,31 +2241,13 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
* If the caller wants a certain signal mask to be set during the wait,
* we apply it here.
*/
- if (sigmask) {
- if (sigsetsize != sizeof(sigset_t))
- return -EINVAL;
- if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
- return -EFAULT;
- sigsaved = current->blocked;
- set_current_blocked(&ksigmask);
- }
+ error = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (error)
+ return error;
error = do_epoll_wait(epfd, events, maxevents, timeout);
- /*
- * If we changed the signal mask, we need to restore the original one.
- * In case we've got a signal while waiting, we do not restore the
- * signal mask yet, and we allow do_signal() to deliver the signal on
- * the way back to userspace, before the signal mask is restored.
- */
- if (sigmask) {
- if (error == -EINTR) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- } else
- set_current_blocked(&sigsaved);
- }
+ restore_user_sigmask(sigmask, &sigsaved);
return error;
}
@@ -2266,31 +2266,13 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
* If the caller wants a certain signal mask to be set during the wait,
* we apply it here.
*/
- if (sigmask) {
- if (sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
- if (get_compat_sigset(&ksigmask, sigmask))
- return -EFAULT;
- sigsaved = current->blocked;
- set_current_blocked(&ksigmask);
- }
+ err = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (err)
+ return err;
err = do_epoll_wait(epfd, events, maxevents, timeout);
- /*
- * If we changed the signal mask, we need to restore the original one.
- * In case we've got a signal while waiting, we do not restore the
- * signal mask yet, and we allow do_signal() to deliver the signal on
- * the way back to userspace, before the signal mask is restored.
- */
- if (sigmask) {
- if (err == -EINTR) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- } else
- set_current_blocked(&sigsaved);
- }
+ restore_user_sigmask(sigmask, &sigsaved);
return err;
}
diff --git a/fs/exec.c b/fs/exec.c
index fc281b738a98..fb72d36f7823 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -218,55 +218,10 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
if (ret <= 0)
return NULL;
- if (write) {
- unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
- unsigned long ptr_size, limit;
-
- /*
- * Since the stack will hold pointers to the strings, we
- * must account for them as well.
- *
- * The size calculation is the entire vma while each arg page is
- * built, so each time we get here it's calculating how far it
- * is currently (rather than each call being just the newly
- * added size from the arg page). As a result, we need to
- * always add the entire size of the pointers, so that on the
- * last call to get_arg_page() we'll actually have the entire
- * correct size.
- */
- ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
- if (ptr_size > ULONG_MAX - size)
- goto fail;
- size += ptr_size;
-
- acct_arg_size(bprm, size / PAGE_SIZE);
-
- /*
- * We've historically supported up to 32 pages (ARG_MAX)
- * of argument strings even with small stacks
- */
- if (size <= ARG_MAX)
- return page;
-
- /*
- * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
- * (whichever is smaller) for the argv+env strings.
- * This ensures that:
- * - the remaining binfmt code will not run out of stack space,
- * - the program will have a reasonable amount of stack left
- * to work from.
- */
- limit = _STK_LIM / 4 * 3;
- limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
- if (size > limit)
- goto fail;
- }
+ if (write)
+ acct_arg_size(bprm, vma_pages(bprm->vma));
return page;
-
-fail:
- put_page(page);
- return NULL;
}
static void put_arg_page(struct page *page)
@@ -492,6 +447,50 @@ static int count(struct user_arg_ptr argv, int max)
return i;
}
+static int prepare_arg_pages(struct linux_binprm *bprm,
+ struct user_arg_ptr argv, struct user_arg_ptr envp)
+{
+ unsigned long limit, ptr_size;
+
+ bprm->argc = count(argv, MAX_ARG_STRINGS);
+ if (bprm->argc < 0)
+ return bprm->argc;
+
+ bprm->envc = count(envp, MAX_ARG_STRINGS);
+ if (bprm->envc < 0)
+ return bprm->envc;
+
+ /*
+ * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
+ * (whichever is smaller) for the argv+env strings.
+ * This ensures that:
+ * - the remaining binfmt code will not run out of stack space,
+ * - the program will have a reasonable amount of stack left
+ * to work from.
+ */
+ limit = _STK_LIM / 4 * 3;
+ limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
+ /*
+ * We've historically supported up to 32 pages (ARG_MAX)
+ * of argument strings even with small stacks
+ */
+ limit = max_t(unsigned long, limit, ARG_MAX);
+ /*
+ * We must account for the size of all the argv and envp pointers to
+ * the argv and envp strings, since they will also take up space in
+ * the stack. They aren't stored until much later when we can't
+ * signal to the parent that the child has run out of stack space.
+ * Instead, calculate it here so it's possible to fail gracefully.
+ */
+ ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+ if (limit <= ptr_size)
+ return -E2BIG;
+ limit -= ptr_size;
+
+ bprm->argmin = bprm->p - limit;
+ return 0;
+}
+
/*
* 'copy_strings()' copies argument/environment strings from the old
* processes's memory to the new process's stack. The call to get_user_pages()
@@ -527,6 +526,10 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
pos = bprm->p;
str += len;
bprm->p -= len;
+#ifdef CONFIG_MMU
+ if (bprm->p < bprm->argmin)
+ goto out;
+#endif
while (len > 0) {
int offset, bytes_to_copy;
@@ -1084,7 +1087,7 @@ static int de_thread(struct task_struct *tsk)
__set_current_state(TASK_KILLABLE);
spin_unlock_irq(lock);
schedule();
- if (unlikely(__fatal_signal_pending(tsk)))
+ if (__fatal_signal_pending(tsk))
goto killed;
spin_lock_irq(lock);
}
@@ -1112,7 +1115,7 @@ static int de_thread(struct task_struct *tsk)
write_unlock_irq(&tasklist_lock);
cgroup_threadgroup_change_end(tsk);
schedule();
- if (unlikely(__fatal_signal_pending(tsk)))
+ if (__fatal_signal_pending(tsk))
goto killed;
}
@@ -1399,7 +1402,7 @@ EXPORT_SYMBOL(finalize_exec);
* Or, if exec fails before, free_bprm() should release ->cred and
* and unlock.
*/
-int prepare_bprm_creds(struct linux_binprm *bprm)
+static int prepare_bprm_creds(struct linux_binprm *bprm)
{
if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
return -ERESTARTNOINTR;
@@ -1789,12 +1792,8 @@ static int __do_execve_file(int fd, struct filename *filename,
if (retval)
goto out_unmark;
- bprm->argc = count(argv, MAX_ARG_STRINGS);
- if ((retval = bprm->argc) < 0)
- goto out;
-
- bprm->envc = count(envp, MAX_ARG_STRINGS);
- if ((retval = bprm->envc) < 0)
+ retval = prepare_arg_pages(bprm, argv, envp);
+ if (retval < 0)
goto out;
retval = prepare_binprm(bprm);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 906839a4da8f..fc80c7233fa5 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -705,21 +705,18 @@ out:
/*
* Read the superblock from the OSD and fill in the fields
*/
-static int exofs_fill_super(struct super_block *sb, void *data, int silent)
+static int exofs_fill_super(struct super_block *sb,
+ struct exofs_mountopt *opts,
+ struct exofs_sb_info *sbi,
+ int silent)
{
struct inode *root;
- struct exofs_mountopt *opts = data;
- struct exofs_sb_info *sbi; /*extended info */
struct osd_dev *od; /* Master device */
struct exofs_fscb fscb; /*on-disk superblock info */
struct ore_comp comp;
unsigned table_count;
int ret;
- sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
- if (!sbi)
- return -ENOMEM;
-
/* use mount options to fill superblock */
if (opts->is_osdname) {
struct osd_dev_info odi = {.systemid_len = 0};
@@ -863,7 +860,9 @@ static struct dentry *exofs_mount(struct file_system_type *type,
int flags, const char *dev_name,
void *data)
{
+ struct super_block *s;
struct exofs_mountopt opts;
+ struct exofs_sb_info *sbi;
int ret;
ret = parse_options(data, &opts);
@@ -872,9 +871,31 @@ static struct dentry *exofs_mount(struct file_system_type *type,
return ERR_PTR(ret);
}
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+ if (!sbi) {
+ kfree(opts.dev_name);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ s = sget(type, NULL, set_anon_super, flags, NULL);
+
+ if (IS_ERR(s)) {
+ kfree(opts.dev_name);
+ kfree(sbi);
+ return ERR_CAST(s);
+ }
+
if (!opts.dev_name)
opts.dev_name = dev_name;
- return mount_nodev(type, flags, &opts, exofs_fill_super);
+
+
+ ret = exofs_fill_super(s, &opts, sbi, flags & SB_SILENT ? 1 : 0);
+ if (ret) {
+ deactivate_locked_super(s);
+ return ERR_PTR(ret);
+ }
+ s->s_flags |= SB_ACTIVE;
+ return dget(s->s_root);
}
/*
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index eb11502e3fcd..73b2d528237f 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -73,7 +73,7 @@ void ext2_error(struct super_block *sb, const char *function,
if (test_opt(sb, ERRORS_PANIC))
panic("EXT2-fs: panic from previous error\n");
- if (test_opt(sb, ERRORS_RO)) {
+ if (!sb_rdonly(sb) && test_opt(sb, ERRORS_RO)) {
ext2_msg(sb, KERN_CRIT,
"error: remounting filesystem read-only");
sb->s_flags |= SB_RDONLY;
@@ -148,10 +148,9 @@ static void ext2_put_super (struct super_block * sb)
ext2_quota_off_umount(sb);
- if (sbi->s_ea_block_cache) {
- ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
- sbi->s_ea_block_cache = NULL;
- }
+ ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
+
if (!sb_rdonly(sb)) {
struct ext2_super_block *es = sbi->s_es;
@@ -1198,8 +1197,7 @@ cantfind_ext2:
sb->s_id);
goto failed_mount;
failed_mount3:
- if (sbi->s_ea_block_cache)
- ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
+ ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index dd8f10db82e9..4f30876ee325 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -835,7 +835,8 @@ ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh)
__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
int error;
- error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr, 1);
+ error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr,
+ true);
if (error) {
if (error == -EBUSY) {
ea_bdebug(bh, "already in cache (%d cache entries)",
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index c1d570ee1d9f..8c7bbf3e566d 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -248,7 +248,8 @@ retry:
error = posix_acl_update_mode(inode, &mode, &acl);
if (error)
goto out_stop;
- update_mode = 1;
+ if (mode != inode->i_mode)
+ update_mode = 1;
}
error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3f89d0ab08fc..185a05d3257e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2454,8 +2454,19 @@ int do_journal_get_write_access(handle_t *handle,
#define FALL_BACK_TO_NONDELALLOC 1
#define CONVERT_INLINE_DATA 2
-extern struct inode *ext4_iget(struct super_block *, unsigned long);
-extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
+typedef enum {
+ EXT4_IGET_NORMAL = 0,
+ EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */
+ EXT4_IGET_HANDLE = 0x0002 /* Inode # is from a handle */
+} ext4_iget_flags;
+
+extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ ext4_iget_flags flags, const char *function,
+ unsigned int line);
+
+#define ext4_iget(sb, ino, flags) \
+ __ext4_iget((sb), (ino), (flags), __func__, __LINE__)
+
extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct dentry *, struct iattr *);
extern int ext4_getattr(const struct path *, struct kstat *, u32, unsigned int);
@@ -2538,6 +2549,8 @@ extern int ext4_group_extend(struct super_block *sb,
extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
/* super.c */
+extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
+ sector_t block, int op_flags);
extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
extern int ext4_calculate_overhead(struct super_block *sb);
extern void ext4_superblock_csum_set(struct super_block *sb);
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 26a7fe5c4fd3..712f00995390 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -116,8 +116,16 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
+ ret = file_write_and_wait_range(file, start, end);
+ if (ret)
+ return ret;
+
if (!journal) {
- ret = __generic_file_fsync(file, start, end, datasync);
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL
+ };
+
+ ret = ext4_write_inode(inode, &wbc);
if (!ret)
ret = ext4_sync_parent(inode);
if (test_opt(inode->i_sb, BARRIER))
@@ -125,9 +133,6 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
- ret = file_write_and_wait_range(file, start, end);
- if (ret)
- return ret;
/*
* data=writeback,ordered:
* The caller's filemap_fdatawrite()/wait will sync the data.
@@ -159,6 +164,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = err;
}
out:
+ err = file_check_and_advance_wb_err(file);
+ if (ret == 0)
+ ret = err;
trace_ext4_sync_file_exit(inode, ret);
return ret;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 014f6a698cb7..7ff14a1adba3 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1225,7 +1225,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
if (!ext4_test_bit(bit, bitmap_bh->b_data))
goto bad_orphan;
- inode = ext4_iget(sb, ino);
+ inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
ext4_error(sb, "couldn't read orphan inode %lu (err %d)",
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 9c4bac18cc6c..56f6e1782d5f 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -705,8 +705,11 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
if (!PageUptodate(page)) {
ret = ext4_read_inline_page(inode, page);
- if (ret < 0)
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
goto out_up_read;
+ }
}
ret = 1;
@@ -1887,12 +1890,12 @@ int ext4_inline_data_fiemap(struct inode *inode,
physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
physical += offsetof(struct ext4_inode, i_block);
- if (physical)
- error = fiemap_fill_next_extent(fieinfo, start, physical,
- inline_len, flags);
brelse(iloc.bh);
out:
up_read(&EXT4_I(inode)->xattr_sem);
+ if (physical)
+ error = fiemap_fill_next_extent(fieinfo, start, physical,
+ inline_len, flags);
return (error < 0 ? error : 0);
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 22a9d8159720..34d7e0703cc6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2778,7 +2778,8 @@ static int ext4_writepages(struct address_space *mapping,
* We may need to convert up to one extent per block in
* the page and we may dirty the inode.
*/
- rsv_blocks = 1 + (PAGE_SIZE >> inode->i_blkbits);
+ rsv_blocks = 1 + ext4_chunk_trans_blocks(inode,
+ PAGE_SIZE >> inode->i_blkbits);
}
/*
@@ -4817,7 +4818,9 @@ static inline u64 ext4_inode_peek_iversion(const struct inode *inode)
return inode_peek_iversion(inode);
}
-struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
+struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ ext4_iget_flags flags, const char *function,
+ unsigned int line)
{
struct ext4_iloc iloc;
struct ext4_inode *raw_inode;
@@ -4831,6 +4834,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
gid_t i_gid;
projid_t i_projid;
+ if ((!(flags & EXT4_IGET_SPECIAL) &&
+ (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) ||
+ (ino < EXT4_ROOT_INO) ||
+ (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
+ if (flags & EXT4_IGET_HANDLE)
+ return ERR_PTR(-ESTALE);
+ __ext4_error(sb, function, line,
+ "inode #%lu: comm %s: iget: illegal inode #",
+ ino, current->comm);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+
inode = iget_locked(sb, ino);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -4846,18 +4861,26 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
raw_inode = ext4_raw_inode(&iloc);
if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
- EXT4_ERROR_INODE(inode, "root inode unallocated");
+ ext4_error_inode(inode, function, line, 0,
+ "iget: root inode unallocated");
ret = -EFSCORRUPTED;
goto bad_inode;
}
+ if ((flags & EXT4_IGET_HANDLE) &&
+ (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
+ ret = -ESTALE;
+ goto bad_inode;
+ }
+
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
EXT4_INODE_SIZE(inode->i_sb) ||
(ei->i_extra_isize & 3)) {
- EXT4_ERROR_INODE(inode,
- "bad extra_isize %u (inode size %u)",
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bad extra_isize %u "
+ "(inode size %u)",
ei->i_extra_isize,
EXT4_INODE_SIZE(inode->i_sb));
ret = -EFSCORRUPTED;
@@ -4879,7 +4902,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
}
if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
- EXT4_ERROR_INODE(inode, "checksum invalid");
+ ext4_error_inode(inode, function, line, 0,
+ "iget: checksum invalid");
ret = -EFSBADCRC;
goto bad_inode;
}
@@ -4936,7 +4960,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
inode->i_size = ext4_isize(sb, raw_inode);
if ((size = i_size_read(inode)) < 0) {
- EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bad i_size value: %lld", size);
ret = -EFSCORRUPTED;
goto bad_inode;
}
@@ -5012,7 +5037,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ret = 0;
if (ei->i_file_acl &&
!ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
- EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bad extended attribute block %llu",
ei->i_file_acl);
ret = -EFSCORRUPTED;
goto bad_inode;
@@ -5040,8 +5066,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
} else if (S_ISLNK(inode->i_mode)) {
/* VFS does not allow setting these so must be corruption */
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
- EXT4_ERROR_INODE(inode,
- "immutable or append flags not allowed on symlinks");
+ ext4_error_inode(inode, function, line, 0,
+ "iget: immutable or append flags "
+ "not allowed on symlinks");
ret = -EFSCORRUPTED;
goto bad_inode;
}
@@ -5071,7 +5098,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
make_bad_inode(inode);
} else {
ret = -EFSCORRUPTED;
- EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bogus i_mode (%o)", inode->i_mode);
goto bad_inode;
}
brelse(iloc.bh);
@@ -5085,13 +5113,6 @@ bad_inode:
return ERR_PTR(ret);
}
-struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
-{
- if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
- return ERR_PTR(-EFSCORRUPTED);
- return ext4_iget(sb, ino);
-}
-
static int ext4_inode_blocks_set(handle_t *handle,
struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
@@ -5380,9 +5401,13 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int err;
- if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
+ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) ||
+ sb_rdonly(inode->i_sb))
return 0;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
if (EXT4_SB(inode->i_sb)->s_journal) {
if (ext4_journal_current_handle()) {
jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
@@ -5398,7 +5423,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
return 0;
- err = ext4_force_commit(inode->i_sb);
+ err = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
+ EXT4_I(inode)->i_sync_tid);
} else {
struct ext4_iloc iloc;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0edee31913d1..d37dafa1d133 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -125,7 +125,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN))
return -EPERM;
- inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO);
+ inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(inode_bl))
return PTR_ERR(inode_bl);
ei_bl = EXT4_I(inode_bl);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 61a9d1927817..b1e4d359f73b 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -116,9 +116,9 @@ static int update_ind_extent_range(handle_t *handle, struct inode *inode,
int i, retval = 0;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
- bh = sb_bread(inode->i_sb, pblock);
- if (!bh)
- return -EIO;
+ bh = ext4_sb_bread(inode->i_sb, pblock, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
@@ -145,9 +145,9 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
int i, retval = 0;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
- bh = sb_bread(inode->i_sb, pblock);
- if (!bh)
- return -EIO;
+ bh = ext4_sb_bread(inode->i_sb, pblock, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
@@ -175,9 +175,9 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
int i, retval = 0;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
- bh = sb_bread(inode->i_sb, pblock);
- if (!bh)
- return -EIO;
+ bh = ext4_sb_bread(inode->i_sb, pblock, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
@@ -224,9 +224,9 @@ static int free_dind_blocks(handle_t *handle,
struct buffer_head *bh;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
- bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
- if (!bh)
- return -EIO;
+ bh = ext4_sb_bread(inode->i_sb, le32_to_cpu(i_data), 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
tmp_idata = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
@@ -254,9 +254,9 @@ static int free_tind_blocks(handle_t *handle,
struct buffer_head *bh;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
- bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
- if (!bh)
- return -EIO;
+ bh = ext4_sb_bread(inode->i_sb, le32_to_cpu(i_data), 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
tmp_idata = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
@@ -382,9 +382,9 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
struct ext4_extent_header *eh;
block = ext4_idx_pblock(ix);
- bh = sb_bread(inode->i_sb, block);
- if (!bh)
- return -EIO;
+ bh = ext4_sb_bread(inode->i_sb, block, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
eh = (struct ext4_extent_header *)bh->b_data;
if (eh->eh_depth != 0) {
@@ -535,22 +535,22 @@ int ext4_ext_migrate(struct inode *inode)
if (i_data[EXT4_IND_BLOCK]) {
retval = update_ind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
- if (retval)
- goto err_out;
+ if (retval)
+ goto err_out;
} else
lb.curr_block += max_entries;
if (i_data[EXT4_DIND_BLOCK]) {
retval = update_dind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
- if (retval)
- goto err_out;
+ if (retval)
+ goto err_out;
} else
lb.curr_block += max_entries * max_entries;
if (i_data[EXT4_TIND_BLOCK]) {
retval = update_tind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
- if (retval)
- goto err_out;
+ if (retval)
+ goto err_out;
}
/*
* Build the last extent
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 437f71fe83ae..2b928eb07fa2 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1571,7 +1571,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
dentry);
return ERR_PTR(-EFSCORRUPTED);
}
- inode = ext4_iget_normal(dir->i_sb, ino);
+ inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL);
if (inode == ERR_PTR(-ESTALE)) {
EXT4_ERROR_INODE(dir,
"deleted inode referenced: %u",
@@ -1613,7 +1613,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
return ERR_PTR(-EFSCORRUPTED);
}
- return d_obtain_alias(ext4_iget_normal(child->d_sb, ino));
+ return d_obtain_alias(ext4_iget(child->d_sb, ino, EXT4_IGET_NORMAL));
}
/*
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index db7590178dfc..2aa62d58d8dd 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
if (!bio)
return -ENOMEM;
- wbc_init_bio(io->io_wbc, bio);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev);
bio->bi_end_io = ext4_end_bio;
bio->bi_private = ext4_get_io_end(io->io_end);
io->io_bio = bio;
io->io_next_block = bh->b_blocknr;
+ wbc_init_bio(io->io_wbc, bio);
return 0;
}
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index f461d75ac049..6aa282ee455a 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -128,7 +128,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
prefetchw(&page->flags);
if (pages) {
- page = list_entry(pages->prev, struct page, lru);
+ page = lru_to_page(pages);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping, page->index,
readahead_gfp_mask(mapping)))
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index a5efee34415f..48421de803b7 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -127,10 +127,12 @@ static int verify_group_input(struct super_block *sb,
else if (free_blocks_count < 0)
ext4_warning(sb, "Bad blocks count %u",
input->blocks_count);
- else if (!(bh = sb_bread(sb, end - 1)))
+ else if (IS_ERR(bh = ext4_sb_bread(sb, end - 1, 0))) {
+ err = PTR_ERR(bh);
+ bh = NULL;
ext4_warning(sb, "Cannot read last block (%llu)",
end - 1);
- else if (outside(input->block_bitmap, start, end))
+ } else if (outside(input->block_bitmap, start, end))
ext4_warning(sb, "Block bitmap not in group (block %llu)",
(unsigned long long)input->block_bitmap);
else if (outside(input->inode_bitmap, start, end))
@@ -781,11 +783,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
- struct buffer_head **o_group_desc, **n_group_desc;
- struct buffer_head *dind;
- struct buffer_head *gdb_bh;
+ struct buffer_head **o_group_desc, **n_group_desc = NULL;
+ struct buffer_head *dind = NULL;
+ struct buffer_head *gdb_bh = NULL;
int gdbackups;
- struct ext4_iloc iloc;
+ struct ext4_iloc iloc = { .bh = NULL };
__le32 *data;
int err;
@@ -794,21 +796,22 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
"EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
gdb_num);
- gdb_bh = sb_bread(sb, gdblock);
- if (!gdb_bh)
- return -EIO;
+ gdb_bh = ext4_sb_bread(sb, gdblock, 0);
+ if (IS_ERR(gdb_bh))
+ return PTR_ERR(gdb_bh);
gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
if (gdbackups < 0) {
err = gdbackups;
- goto exit_bh;
+ goto errout;
}
data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
- dind = sb_bread(sb, le32_to_cpu(*data));
- if (!dind) {
- err = -EIO;
- goto exit_bh;
+ dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0);
+ if (IS_ERR(dind)) {
+ err = PTR_ERR(dind);
+ dind = NULL;
+ goto errout;
}
data = (__le32 *)dind->b_data;
@@ -816,18 +819,18 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
ext4_warning(sb, "new group %u GDT block %llu not reserved",
group, gdblock);
err = -EINVAL;
- goto exit_dind;
+ goto errout;
}
BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
if (unlikely(err))
- goto exit_dind;
+ goto errout;
BUFFER_TRACE(gdb_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, gdb_bh);
if (unlikely(err))
- goto exit_dind;
+ goto errout;
BUFFER_TRACE(dind, "get_write_access");
err = ext4_journal_get_write_access(handle, dind);
@@ -837,7 +840,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
/* ext4_reserve_inode_write() gets a reference on the iloc */
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (unlikely(err))
- goto exit_dind;
+ goto errout;
n_group_desc = ext4_kvmalloc((gdb_num + 1) *
sizeof(struct buffer_head *),
@@ -846,7 +849,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
err = -ENOMEM;
ext4_warning(sb, "not enough memory for %lu groups",
gdb_num + 1);
- goto exit_inode;
+ goto errout;
}
/*
@@ -862,7 +865,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
err = ext4_handle_dirty_metadata(handle, NULL, dind);
if (unlikely(err)) {
ext4_std_error(sb, err);
- goto exit_inode;
+ goto errout;
}
inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >>
(9 - EXT4_SB(sb)->s_cluster_bits);
@@ -871,8 +874,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
if (unlikely(err)) {
ext4_std_error(sb, err);
- iloc.bh = NULL;
- goto exit_inode;
+ goto errout;
}
brelse(dind);
@@ -888,15 +890,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
err = ext4_handle_dirty_super(handle, sb);
if (err)
ext4_std_error(sb, err);
-
return err;
-
-exit_inode:
+errout:
kvfree(n_group_desc);
brelse(iloc.bh);
-exit_dind:
brelse(dind);
-exit_bh:
brelse(gdb_bh);
ext4_debug("leaving with error %d\n", err);
@@ -916,9 +914,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
gdblock = ext4_meta_bg_first_block_no(sb, group) +
ext4_bg_has_super(sb, group);
- gdb_bh = sb_bread(sb, gdblock);
- if (!gdb_bh)
- return -EIO;
+ gdb_bh = ext4_sb_bread(sb, gdblock, 0);
+ if (IS_ERR(gdb_bh))
+ return PTR_ERR(gdb_bh);
n_group_desc = ext4_kvmalloc((gdb_num + 1) *
sizeof(struct buffer_head *),
GFP_NOFS);
@@ -975,9 +973,10 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
return -ENOMEM;
data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
- dind = sb_bread(sb, le32_to_cpu(*data));
- if (!dind) {
- err = -EIO;
+ dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0);
+ if (IS_ERR(dind)) {
+ err = PTR_ERR(dind);
+ dind = NULL;
goto exit_free;
}
@@ -996,9 +995,10 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
err = -EINVAL;
goto exit_bh;
}
- primary[res] = sb_bread(sb, blk);
- if (!primary[res]) {
- err = -EIO;
+ primary[res] = ext4_sb_bread(sb, blk, 0);
+ if (IS_ERR(primary[res])) {
+ err = PTR_ERR(primary[res]);
+ primary[res] = NULL;
goto exit_bh;
}
gdbackups = verify_reserved_gdb(sb, group, primary[res]);
@@ -1631,13 +1631,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
}
if (reserved_gdb || gdb_off == 0) {
- if (ext4_has_feature_resize_inode(sb) ||
+ if (!ext4_has_feature_resize_inode(sb) ||
!le16_to_cpu(es->s_reserved_gdt_blocks)) {
ext4_warning(sb,
"No reserved GDT blocks, can't resize");
return -EPERM;
}
- inode = ext4_iget(sb, EXT4_RESIZE_INO);
+ inode = ext4_iget(sb, EXT4_RESIZE_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(inode);
@@ -1965,7 +1965,8 @@ retry:
}
if (!resize_inode)
- resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
+ resize_inode = ext4_iget(sb, EXT4_RESIZE_INO,
+ EXT4_IGET_SPECIAL);
if (IS_ERR(resize_inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(resize_inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 53ff6c2a26ed..fb12d3c17c1b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -140,6 +140,29 @@ MODULE_ALIAS_FS("ext3");
MODULE_ALIAS("ext3");
#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
+/*
+ * This works like sb_bread() except it uses ERR_PTR for error
+ * returns. Currently with sb_bread it's impossible to distinguish
+ * between ENOMEM and EIO situations (since both result in a NULL
+ * return.
+ */
+struct buffer_head *
+ext4_sb_bread(struct super_block *sb, sector_t block, int op_flags)
+{
+ struct buffer_head *bh = sb_getblk(sb, block);
+
+ if (bh == NULL)
+ return ERR_PTR(-ENOMEM);
+ if (buffer_uptodate(bh))
+ return bh;
+ ll_rw_block(REQ_OP_READ, REQ_META | op_flags, 1, &bh);
+ wait_on_buffer(bh);
+ if (buffer_uptodate(bh))
+ return bh;
+ put_bh(bh);
+ return ERR_PTR(-EIO);
+}
+
static int ext4_verify_csum_type(struct super_block *sb,
struct ext4_super_block *es)
{
@@ -1000,14 +1023,13 @@ static void ext4_put_super(struct super_block *sb)
invalidate_bdev(sbi->journal_bdev);
ext4_blkdev_remove(sbi);
}
- if (sbi->s_ea_inode_cache) {
- ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
- sbi->s_ea_inode_cache = NULL;
- }
- if (sbi->s_ea_block_cache) {
- ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
- sbi->s_ea_block_cache = NULL;
- }
+
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
+ sbi->s_ea_inode_cache = NULL;
+
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
+
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
brelse(sbi->s_sbh);
@@ -1151,20 +1173,11 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
{
struct inode *inode;
- if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
- return ERR_PTR(-ESTALE);
- if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
- return ERR_PTR(-ESTALE);
-
- /* iget isn't really right if the inode is currently unallocated!!
- *
- * ext4_read_inode will return a bad_inode if the inode had been
- * deleted, so we should be safe.
- *
+ /*
* Currently we don't know the generation for parent directory, so
* a generation of 0 means "accept any"
*/
- inode = ext4_iget_normal(sb, ino);
+ inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (generation && inode->i_generation != generation) {
@@ -1189,6 +1202,16 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
ext4_nfs_get_inode);
}
+static int ext4_nfs_commit_metadata(struct inode *inode)
+{
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL
+ };
+
+ trace_ext4_nfs_commit_metadata(inode);
+ return ext4_write_inode(inode, &wbc);
+}
+
/*
* Try to release metadata pages (indirect blocks, directories) which are
* mapped via the block device. Since these pages could have journal heads
@@ -1393,6 +1416,7 @@ static const struct export_operations ext4_export_ops = {
.fh_to_dentry = ext4_fh_to_dentry,
.fh_to_parent = ext4_fh_to_parent,
.get_parent = ext4_get_parent,
+ .commit_metadata = ext4_nfs_commit_metadata,
};
enum {
@@ -1939,7 +1963,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
#ifdef CONFIG_FS_DAX
ext4_msg(sb, KERN_WARNING,
"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
- sbi->s_mount_opt |= m->mount_opt;
+ sbi->s_mount_opt |= m->mount_opt;
#else
ext4_msg(sb, KERN_INFO, "dax option not supported");
return -1;
@@ -3842,12 +3866,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
- sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
+ goto failed_mount;
}
if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
ext4_msg(sb, KERN_ERR,
- "DAX unsupported by block device. Turning off DAX.");
- sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
+ "DAX unsupported by block device.");
+ goto failed_mount;
}
}
@@ -4328,7 +4352,7 @@ no_journal:
* so we can safely mount the rest of the filesystem now.
*/
- root = ext4_iget(sb, EXT4_ROOT_INO);
+ root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(root)) {
ext4_msg(sb, KERN_ERR, "get root inode failed");
ret = PTR_ERR(root);
@@ -4522,14 +4546,12 @@ failed_mount4:
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
- if (sbi->s_ea_inode_cache) {
- ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
- sbi->s_ea_inode_cache = NULL;
- }
- if (sbi->s_ea_block_cache) {
- ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
- sbi->s_ea_block_cache = NULL;
- }
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
+ sbi->s_ea_inode_cache = NULL;
+
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
+
if (sbi->s_journal) {
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
@@ -4598,7 +4620,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
* happen if we iget() an unused inode, as the subsequent iput()
* will try to delete it.
*/
- journal_inode = ext4_iget(sb, journal_inum);
+ journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
if (IS_ERR(journal_inode)) {
ext4_msg(sb, KERN_ERR, "no journal found");
return NULL;
@@ -4880,7 +4902,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
ext4_superblock_csum_set(sb);
if (sync)
lock_buffer(sbh);
- if (buffer_write_io_error(sbh)) {
+ if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
/*
* Oh, dear. A previous attempt to write the
* superblock failed. This could happen because the
@@ -5680,7 +5702,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
if (!qf_inums[type])
return -EPERM;
- qf_inode = ext4_iget(sb, qf_inums[type]);
+ qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
if (IS_ERR(qf_inode)) {
ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
return PTR_ERR(qf_inode);
@@ -5690,9 +5712,9 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
qf_inode->i_flags |= S_NOQUOTA;
lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
err = dquot_enable(qf_inode, type, format_id, flags);
- iput(qf_inode);
if (err)
lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
+ iput(qf_inode);
return err;
}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 7643d52c776c..86ed9c686249 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -384,7 +384,7 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
struct inode *inode;
int err;
- inode = ext4_iget(parent->i_sb, ea_ino);
+ inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
ext4_error(parent->i_sb,
@@ -522,14 +522,13 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
- error = -ENODATA;
if (!EXT4_I(inode)->i_file_acl)
- goto cleanup;
+ return -ENODATA;
ea_idebug(inode, "reading block %llu",
(unsigned long long)EXT4_I(inode)->i_file_acl);
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- if (!bh)
- goto cleanup;
+ bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
error = ext4_xattr_check_block(inode, bh);
@@ -696,26 +695,23 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
- error = 0;
if (!EXT4_I(inode)->i_file_acl)
- goto cleanup;
+ return 0;
ea_idebug(inode, "reading block %llu",
(unsigned long long)EXT4_I(inode)->i_file_acl);
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- error = -EIO;
- if (!bh)
- goto cleanup;
+ bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
error = ext4_xattr_check_block(inode, bh);
if (error)
goto cleanup;
ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh);
- error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
-
+ error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer,
+ buffer_size);
cleanup:
brelse(bh);
-
return error;
}
@@ -830,9 +826,9 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
}
if (EXT4_I(inode)->i_file_acl) {
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- if (!bh) {
- ret = -EIO;
+ bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
+ if (IS_ERR(bh)) {
+ ret = PTR_ERR(bh);
goto out;
}
@@ -1486,7 +1482,8 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
}
while (ce) {
- ea_inode = ext4_iget(inode->i_sb, ce->e_value);
+ ea_inode = ext4_iget(inode->i_sb, ce->e_value,
+ EXT4_IGET_NORMAL);
if (!IS_ERR(ea_inode) &&
!is_bad_inode(ea_inode) &&
(EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) &&
@@ -1821,16 +1818,15 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
if (EXT4_I(inode)->i_file_acl) {
/* The inode already has an extended attribute block. */
- bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl);
- error = -EIO;
- if (!bs->bh)
- goto cleanup;
+ bs->bh = ext4_sb_bread(sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
+ if (IS_ERR(bs->bh))
+ return PTR_ERR(bs->bh);
ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
atomic_read(&(bs->bh->b_count)),
le32_to_cpu(BHDR(bs->bh)->h_refcount));
error = ext4_xattr_check_block(inode, bs->bh);
if (error)
- goto cleanup;
+ return error;
/* Find the named attribute. */
bs->s.base = BHDR(bs->bh);
bs->s.first = BFIRST(bs->bh);
@@ -1839,13 +1835,10 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
error = xattr_find_entry(inode, &bs->s.here, bs->s.end,
i->name_index, i->name, 1);
if (error && error != -ENODATA)
- goto cleanup;
+ return error;
bs->s.not_found = error;
}
- error = 0;
-
-cleanup:
- return error;
+ return 0;
}
static int
@@ -2274,9 +2267,9 @@ static struct buffer_head *ext4_xattr_get_block(struct inode *inode)
if (!EXT4_I(inode)->i_file_acl)
return NULL;
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- if (!bh)
- return ERR_PTR(-EIO);
+ bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
+ if (IS_ERR(bh))
+ return bh;
error = ext4_xattr_check_block(inode, bh);
if (error) {
brelse(bh);
@@ -2729,7 +2722,7 @@ retry:
base = IFIRST(header);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
min_offs = end - base;
- total_ino = sizeof(struct ext4_xattr_ibody_header);
+ total_ino = sizeof(struct ext4_xattr_ibody_header) + sizeof(u32);
error = xattr_check_inode(inode, header, end);
if (error)
@@ -2746,10 +2739,11 @@ retry:
if (EXT4_I(inode)->i_file_acl) {
struct buffer_head *bh;
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- error = -EIO;
- if (!bh)
+ bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
+ if (IS_ERR(bh)) {
+ error = PTR_ERR(bh);
goto cleanup;
+ }
error = ext4_xattr_check_block(inode, bh);
if (error) {
brelse(bh);
@@ -2903,11 +2897,12 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
}
if (EXT4_I(inode)->i_file_acl) {
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- if (!bh) {
- EXT4_ERROR_INODE(inode, "block %llu read error",
- EXT4_I(inode)->i_file_acl);
- error = -EIO;
+ bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
+ if (IS_ERR(bh)) {
+ error = PTR_ERR(bh);
+ if (error == -EIO)
+ EXT4_ERROR_INODE(inode, "block %llu read error",
+ EXT4_I(inode)->i_file_acl);
goto cleanup;
}
error = ext4_xattr_check_block(inode, bh);
@@ -3060,8 +3055,10 @@ ext4_xattr_block_cache_find(struct inode *inode,
while (ce) {
struct buffer_head *bh;
- bh = sb_bread(inode->i_sb, ce->e_value);
- if (!bh) {
+ bh = ext4_sb_bread(inode->i_sb, ce->e_value, REQ_PRIO);
+ if (IS_ERR(bh)) {
+ if (PTR_ERR(bh) == -ENOMEM)
+ return NULL;
EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long)ce->e_value);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index fa707cdd4120..63e599524085 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -160,7 +160,7 @@ static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi,
return (void *)f2fs_acl;
fail:
- kfree(f2fs_acl);
+ kvfree(f2fs_acl);
return ERR_PTR(-EINVAL);
}
@@ -190,7 +190,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
acl = NULL;
else
acl = ERR_PTR(retval);
- kfree(value);
+ kvfree(value);
return acl;
}
@@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
- kfree(value);
+ kvfree(value);
if (!error)
set_cached_acl(inode, type, acl);
@@ -352,12 +352,14 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
return PTR_ERR(p);
clone = f2fs_acl_clone(p, GFP_NOFS);
- if (!clone)
- goto no_mem;
+ if (!clone) {
+ ret = -ENOMEM;
+ goto release_acl;
+ }
ret = f2fs_acl_create_masq(clone, mode);
if (ret < 0)
- goto no_mem_clone;
+ goto release_clone;
if (ret == 0)
posix_acl_release(clone);
@@ -371,11 +373,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
return 0;
-no_mem_clone:
+release_clone:
posix_acl_release(clone);
-no_mem:
+release_acl:
posix_acl_release(p);
- return -ENOMEM;
+ return ret;
}
int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 9c28ea439e0b..f955cd3e0677 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -44,7 +44,7 @@ repeat:
cond_resched();
goto repeat;
}
- f2fs_wait_on_page_writeback(page, META, true);
+ f2fs_wait_on_page_writeback(page, META, true, true);
if (!PageUptodate(page))
SetPageUptodate(page);
return page;
@@ -370,9 +370,8 @@ continue_unlock:
goto continue_unlock;
}
- f2fs_wait_on_page_writeback(page, META, true);
+ f2fs_wait_on_page_writeback(page, META, true, true);
- BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -911,7 +910,7 @@ free_fail_no_cp:
f2fs_put_page(cp1, 1);
f2fs_put_page(cp2, 1);
fail_no_cp:
- kfree(sbi->ckpt);
+ kvfree(sbi->ckpt);
return -EINVAL;
}
@@ -1290,11 +1289,11 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,
struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
int err;
+ f2fs_wait_on_page_writeback(page, META, true, true);
+
memcpy(page_address(page), src, PAGE_SIZE);
- set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, META, true);
- f2fs_bug_on(sbi, PageWriteback(page));
+ set_page_dirty(page);
if (unlikely(!clear_page_dirty_for_io(page)))
f2fs_bug_on(sbi, 1);
@@ -1328,11 +1327,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
int err;
/* Flush all the NAT/SIT pages */
- while (get_pages(sbi, F2FS_DIRTY_META)) {
- f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
- if (unlikely(f2fs_cp_error(sbi)))
- break;
- }
+ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
+ !f2fs_cp_error(sbi));
/*
* modify checkpoint
@@ -1405,14 +1402,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
for (i = 0; i < nm_i->nat_bits_blocks; i++)
f2fs_update_meta_page(sbi, nm_i->nat_bits +
(i << F2FS_BLKSIZE_BITS), blk + i);
-
- /* Flush all the NAT BITS pages */
- while (get_pages(sbi, F2FS_DIRTY_META)) {
- f2fs_sync_meta_pages(sbi, META, LONG_MAX,
- FS_CP_META_IO);
- if (unlikely(f2fs_cp_error(sbi)))
- break;
- }
}
/* write out checkpoint buffer at block 0 */
@@ -1448,6 +1437,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Here, we have one bio having CP pack except cp pack 2 page */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
+ !f2fs_cp_error(sbi));
/* wait for previous submitted meta pages writeback */
f2fs_wait_on_all_pages_writeback(sbi);
@@ -1465,7 +1456,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* invalidate intermediate page cache borrowed from meta inode
* which are used for migration of encrypted inode's blocks.
*/
- if (f2fs_sb_has_encrypt(sbi->sb))
+ if (f2fs_sb_has_encrypt(sbi))
invalidate_mapping_pages(META_MAPPING(sbi),
MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b293cb3e27a2..f91d8630c9a2 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -372,29 +372,6 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
return false;
}
-static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
- struct page *page, nid_t ino,
- enum page_type type)
-{
- enum page_type btype = PAGE_TYPE_OF_BIO(type);
- enum temp_type temp;
- struct f2fs_bio_info *io;
- bool ret = false;
-
- for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
- io = sbi->write_io[btype] + temp;
-
- down_read(&io->io_rwsem);
- ret = __has_merged_page(io, inode, page, ino);
- up_read(&io->io_rwsem);
-
- /* TODO: use HOT temp only for meta pages now. */
- if (ret || btype == META)
- break;
- }
- return ret;
-}
-
static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
enum page_type type, enum temp_type temp)
{
@@ -420,13 +397,19 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
nid_t ino, enum page_type type, bool force)
{
enum temp_type temp;
-
- if (!force && !has_merged_page(sbi, inode, page, ino, type))
- return;
+ bool ret = true;
for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+ if (!force) {
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
- __f2fs_submit_merged_write(sbi, type, temp);
+ down_read(&io->io_rwsem);
+ ret = __has_merged_page(io, inode, page, ino);
+ up_read(&io->io_rwsem);
+ }
+ if (ret)
+ __f2fs_submit_merged_write(sbi, type, temp);
/* TODO: use HOT temp only for meta pages now. */
if (type >= META)
@@ -643,7 +626,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn)
*/
void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
{
- f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
__set_data_blkaddr(dn);
if (set_page_dirty(dn->node_page))
dn->node_changed = true;
@@ -673,7 +656,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
dn->ofs_in_node, count);
- f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
for (; count > 0; dn->ofs_in_node++) {
block_t blkaddr = datablock_addr(dn->inode,
@@ -957,6 +940,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
return err;
}
+ if (direct_io && allow_outplace_dio(inode, iocb, from))
+ return 0;
+
if (is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
@@ -970,6 +956,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
map.m_next_pgofs = NULL;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = true;
if (direct_io) {
map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
@@ -1028,7 +1015,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
unsigned int maxblocks = map->m_len;
struct dnode_of_data dn;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- int mode = create ? ALLOC_NODE : LOOKUP_NODE;
+ int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
pgoff_t pgofs, end_offset, end;
int err = 0, ofs = 1;
unsigned int ofs_in_node, last_ofs_in_node;
@@ -1048,6 +1035,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
end = pgofs + maxblocks;
if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+ if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
+ map->m_may_create)
+ goto next_dnode;
+
map->m_pblk = ei.blk + pgofs - ei.fofs;
map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
map->m_flags = F2FS_MAP_MAPPED;
@@ -1062,7 +1053,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
}
next_dnode:
- if (create)
+ if (map->m_may_create)
__do_map_lock(sbi, flag, true);
/* When reading holes, we need its node page */
@@ -1099,11 +1090,13 @@ next_block:
if (is_valid_data_blkaddr(sbi, blkaddr)) {
/* use out-place-update for driect IO under LFS mode */
- if (test_opt(sbi, LFS) && create &&
- flag == F2FS_GET_BLOCK_DIO) {
+ if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
+ map->m_may_create) {
err = __allocate_data_block(&dn, map->m_seg_type);
- if (!err)
+ if (!err) {
+ blkaddr = dn.data_blkaddr;
set_inode_flag(inode, FI_APPEND_WRITE);
+ }
}
} else {
if (create) {
@@ -1209,7 +1202,7 @@ skip:
f2fs_put_dnode(&dn);
- if (create) {
+ if (map->m_may_create) {
__do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
@@ -1235,7 +1228,7 @@ sync_out:
}
f2fs_put_dnode(&dn);
unlock_out:
- if (create) {
+ if (map->m_may_create) {
__do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
@@ -1257,6 +1250,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
map.m_next_pgofs = NULL;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
last_lblk = F2FS_BLK_ALIGN(pos + len);
while (map.m_lblk < last_lblk) {
@@ -1271,7 +1265,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
static int __get_data_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create, int flag,
- pgoff_t *next_pgofs, int seg_type)
+ pgoff_t *next_pgofs, int seg_type, bool may_write)
{
struct f2fs_map_blocks map;
int err;
@@ -1281,6 +1275,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
map.m_next_pgofs = next_pgofs;
map.m_next_extent = NULL;
map.m_seg_type = seg_type;
+ map.m_may_create = may_write;
err = f2fs_map_blocks(inode, &map, create, flag);
if (!err) {
@@ -1297,16 +1292,25 @@ static int get_data_block(struct inode *inode, sector_t iblock,
{
return __get_data_block(inode, iblock, bh_result, create,
flag, next_pgofs,
- NO_CHECK_TYPE);
+ NO_CHECK_TYPE, create);
+}
+
+static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create,
+ F2FS_GET_BLOCK_DIO, NULL,
+ f2fs_rw_hint_to_seg_type(inode->i_write_hint),
+ true);
}
static int get_data_block_dio(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DIO, NULL,
- f2fs_rw_hint_to_seg_type(
- inode->i_write_hint));
+ F2FS_GET_BLOCK_DIO, NULL,
+ f2fs_rw_hint_to_seg_type(inode->i_write_hint),
+ false);
}
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
@@ -1318,7 +1322,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock,
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_BMAP, NULL,
- NO_CHECK_TYPE);
+ NO_CHECK_TYPE, create);
}
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -1525,6 +1529,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
map.m_next_pgofs = NULL;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
for (; nr_pages; nr_pages--) {
if (pages) {
@@ -1855,6 +1860,8 @@ got_it:
if (fio->need_lock == LOCK_REQ)
f2fs_unlock_op(fio->sbi);
err = f2fs_inplace_write_data(fio);
+ if (err && PageWriteback(page))
+ end_page_writeback(page);
trace_f2fs_do_write_data_page(fio->page, IPU);
set_inode_flag(inode, FI_UPDATE_WRITE);
return err;
@@ -2142,12 +2149,11 @@ continue_unlock:
if (PageWriteback(page)) {
if (wbc->sync_mode != WB_SYNC_NONE)
f2fs_wait_on_page_writeback(page,
- DATA, true);
+ DATA, true, true);
else
goto continue_unlock;
}
- BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -2324,6 +2330,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
bool locked = false;
struct extent_info ei = {0,0,0};
int err = 0;
+ int flag;
/*
* we already allocated all the blocks, so we don't need to get
@@ -2333,9 +2340,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
!is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
+ /* f2fs_lock_op avoids race between write CP and convert_inline_page */
+ if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
+ flag = F2FS_GET_BLOCK_DEFAULT;
+ else
+ flag = F2FS_GET_BLOCK_PRE_AIO;
+
if (f2fs_has_inline_data(inode) ||
(pos & PAGE_MASK) >= i_size_read(inode)) {
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+ __do_map_lock(sbi, flag, true);
locked = true;
}
restart:
@@ -2373,6 +2386,7 @@ restart:
f2fs_put_dnode(&dn);
__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
true);
+ WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
locked = true;
goto restart;
}
@@ -2386,7 +2400,7 @@ out:
f2fs_put_dnode(&dn);
unlock_out:
if (locked)
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+ __do_map_lock(sbi, flag, false);
return err;
}
@@ -2457,7 +2471,7 @@ repeat:
}
}
- f2fs_wait_on_page_writeback(page, DATA, false);
+ f2fs_wait_on_page_writeback(page, DATA, false, true);
if (len == PAGE_SIZE || PageUptodate(page))
return 0;
@@ -2548,6 +2562,53 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
return 0;
}
+static void f2fs_dio_end_io(struct bio *bio)
+{
+ struct f2fs_private_dio *dio = bio->bi_private;
+
+ dec_page_count(F2FS_I_SB(dio->inode),
+ dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
+
+ bio->bi_private = dio->orig_private;
+ bio->bi_end_io = dio->orig_end_io;
+
+ kvfree(dio);
+
+ bio_endio(bio);
+}
+
+static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
+ loff_t file_offset)
+{
+ struct f2fs_private_dio *dio;
+ bool write = (bio_op(bio) == REQ_OP_WRITE);
+ int err;
+
+ dio = f2fs_kzalloc(F2FS_I_SB(inode),
+ sizeof(struct f2fs_private_dio), GFP_NOFS);
+ if (!dio) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ dio->inode = inode;
+ dio->orig_end_io = bio->bi_end_io;
+ dio->orig_private = bio->bi_private;
+ dio->write = write;
+
+ bio->bi_end_io = f2fs_dio_end_io;
+ bio->bi_private = dio;
+
+ inc_page_count(F2FS_I_SB(inode),
+ write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
+
+ submit_bio(bio);
+ return;
+out:
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+}
+
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
@@ -2594,7 +2655,10 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
down_read(&fi->i_gc_rwsem[READ]);
}
- err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
+ err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+ iter, rw == WRITE ? get_data_block_dio_write :
+ get_data_block_dio, NULL, f2fs_dio_submit_bio,
+ DIO_LOCKING | DIO_SKIP_HOLES);
if (do_opu)
up_read(&fi->i_gc_rwsem[READ]);
@@ -2738,7 +2802,7 @@ int f2fs_migrate_page(struct address_space *mapping,
*/
extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
rc = migrate_page_move_mapping(mapping, newpage,
- page, NULL, mode, extra_count);
+ page, mode, extra_count);
if (rc != MIGRATEPAGE_SUCCESS) {
if (atomic_written)
mutex_unlock(&fi->inmem_lock);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 139b4d5c83d5..ebcc121920ba 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -53,6 +53,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->vw_cnt = atomic_read(&sbi->vw_cnt);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
+ si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ);
+ si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA);
@@ -62,7 +64,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->nr_flushed =
atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
si->nr_flushing =
- atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
+ atomic_read(&SM_I(sbi)->fcc_info->queued_flush);
si->flush_list_empty =
llist_empty(&SM_I(sbi)->fcc_info->issue_list);
}
@@ -70,7 +72,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->nr_discarded =
atomic_read(&SM_I(sbi)->dcc_info->issued_discard);
si->nr_discarding =
- atomic_read(&SM_I(sbi)->dcc_info->issing_discard);
+ atomic_read(&SM_I(sbi)->dcc_info->queued_discard);
si->nr_discard_cmd =
atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks;
@@ -197,7 +199,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
si->base_mem += SIT_VBLOCK_MAP_SIZE;
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
@@ -374,6 +376,8 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
+ seq_printf(s, " - DIO (R: %4d, W: %4d)\n",
+ si->nr_dio_read, si->nr_dio_write);
seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n",
si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta);
seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
@@ -444,18 +448,7 @@ static int stat_show(struct seq_file *s, void *v)
return 0;
}
-static int stat_open(struct inode *inode, struct file *file)
-{
- return single_open(file, stat_show, inode->i_private);
-}
-
-static const struct file_operations stat_fops = {
- .owner = THIS_MODULE,
- .open = stat_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(stat);
int f2fs_build_stats(struct f2fs_sb_info *sbi)
{
@@ -510,7 +503,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
list_del(&si->stat_list);
mutex_unlock(&f2fs_stat_mutex);
- kfree(si);
+ kvfree(si);
}
int __init f2fs_create_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index bacc667950b6..50d0d36280fa 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -293,7 +293,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
{
enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
lock_page(page);
- f2fs_wait_on_page_writeback(page, type, true);
+ f2fs_wait_on_page_writeback(page, type, true, true);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode->i_mode);
set_page_dirty(page);
@@ -307,7 +307,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
{
struct f2fs_inode *ri;
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
/* copy name info. to this inode page */
ri = F2FS_INODE(ipage);
@@ -550,7 +550,7 @@ start:
++level;
goto start;
add_dentry:
- f2fs_wait_on_page_writeback(dentry_page, DATA, true);
+ f2fs_wait_on_page_writeback(dentry_page, DATA, true, true);
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
@@ -705,7 +705,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
return f2fs_delete_inline_entry(dentry, page, dir, inode);
lock_page(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
dentry_blk = page_address(page);
bit_pos = dentry - dentry_blk->dentry;
@@ -808,6 +808,17 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
+ /* check memory boundary before moving forward */
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+ if (unlikely(bit_pos > d->max)) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted namelen=%d, run fsck to fix.",
+ __func__, le16_to_cpu(de->name_len));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ err = -EINVAL;
+ goto out;
+ }
+
if (f2fs_encrypted_inode(d->inode)) {
int save_len = fstr->len;
@@ -830,7 +841,6 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
if (readdir_ra)
f2fs_ra_node_page(sbi, le32_to_cpu(de->ino));
- bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
ctx->pos = start_pos + bit_pos;
}
out:
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1e031971a466..12fabd6735dd 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -67,7 +67,7 @@ struct f2fs_fault_info {
unsigned int inject_type;
};
-extern char *f2fs_fault_name[FAULT_MAX];
+extern const char *f2fs_fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
#endif
@@ -152,12 +152,13 @@ struct f2fs_mount_info {
#define F2FS_FEATURE_VERITY 0x0400 /* reserved */
#define F2FS_FEATURE_SB_CHKSUM 0x0800
-#define F2FS_HAS_FEATURE(sb, mask) \
- ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
-#define F2FS_SET_FEATURE(sb, mask) \
- (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask))
-#define F2FS_CLEAR_FEATURE(sb, mask) \
- (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask))
+#define __F2FS_HAS_FEATURE(raw_super, mask) \
+ ((raw_super->feature & cpu_to_le32(mask)) != 0)
+#define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask)
+#define F2FS_SET_FEATURE(sbi, mask) \
+ (sbi->raw_super->feature |= cpu_to_le32(mask))
+#define F2FS_CLEAR_FEATURE(sbi, mask) \
+ (sbi->raw_super->feature &= ~cpu_to_le32(mask))
/*
* Default values for user and/or group using reserved blocks
@@ -284,7 +285,7 @@ struct discard_cmd {
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
- unsigned char issuing; /* issuing discard */
+ unsigned char queued; /* queued discard */
int error; /* bio error */
spinlock_t lock; /* for state/bio_ref updating */
unsigned short bio_ref; /* bio reference count */
@@ -326,7 +327,7 @@ struct discard_cmd_control {
unsigned int undiscard_blks; /* # of undiscard blocks */
unsigned int next_pos; /* next discard position */
atomic_t issued_discard; /* # of issued discard */
- atomic_t issing_discard; /* # of issing discard */
+ atomic_t queued_discard; /* # of queued discard */
atomic_t discard_cmd_cnt; /* # of cached cmd count */
struct rb_root_cached root; /* root of discard rb-tree */
bool rbtree_check; /* config for consistence check */
@@ -416,6 +417,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */
#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */
#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */
+#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
@@ -557,16 +559,8 @@ struct extent_info {
};
struct extent_node {
- struct rb_node rb_node;
- union {
- struct {
- unsigned int fofs;
- unsigned int len;
- u32 blk;
- };
- struct extent_info ei; /* extent info */
-
- };
+ struct rb_node rb_node; /* rb node located in rb-tree */
+ struct extent_info ei; /* extent info */
struct list_head list; /* node in global extent list of sbi */
struct extent_tree *et; /* extent tree pointer */
};
@@ -601,6 +595,7 @@ struct f2fs_map_blocks {
pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */
pgoff_t *m_next_extent; /* point to next possible extent */
int m_seg_type;
+ bool m_may_create; /* indicate it is from write path */
};
/* for flag in get_data_block */
@@ -889,7 +884,7 @@ struct flush_cmd_control {
struct task_struct *f2fs_issue_flush; /* flush thread */
wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
atomic_t issued_flush; /* # of issued flushes */
- atomic_t issing_flush; /* # of issing flushes */
+ atomic_t queued_flush; /* # of queued flushes */
struct llist_head issue_list; /* list for command issue */
struct llist_node *dispatch_list; /* list for command dispatch */
};
@@ -956,6 +951,8 @@ enum count_type {
F2FS_RD_DATA,
F2FS_RD_NODE,
F2FS_RD_META,
+ F2FS_DIO_WRITE,
+ F2FS_DIO_READ,
NR_COUNT_TYPE,
};
@@ -1170,8 +1167,6 @@ struct f2fs_sb_info {
/* for bio operations */
struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */
- struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE];
- /* bio ordering for NODE/DATA */
/* keep migration IO order for LFS mode */
struct rw_semaphore io_order_lock;
mempool_t *write_io_dummy; /* Dummy pages */
@@ -1263,6 +1258,7 @@ struct f2fs_sb_info {
struct f2fs_gc_kthread *gc_thread; /* GC thread */
unsigned int cur_victim_sec; /* current victim section num */
unsigned int gc_mode; /* current GC state */
+ unsigned int next_victim_seg[2]; /* next segment in victim section */
/* for skip statistic */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
unsigned long long skipped_gc_rwsem; /* FG_GC only */
@@ -1272,6 +1268,8 @@ struct f2fs_sb_info {
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
+ /* migration granularity of garbage collection, unit: segment */
+ unsigned int migration_granularity;
/*
* for stat information.
@@ -1330,6 +1328,13 @@ struct f2fs_sb_info {
__u32 s_chksum_seed;
};
+struct f2fs_private_dio {
+ struct inode *inode;
+ void *orig_private;
+ bio_end_io_t *orig_end_io;
+ bool write;
+};
+
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(type) \
printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \
@@ -1608,12 +1613,16 @@ static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
{
unsigned long flags;
- set_sbi_flag(sbi, SBI_NEED_FSCK);
+ /*
+ * In order to re-enable nat_bits we need to call fsck.f2fs by
+ * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost,
+ * so let's rely on regular fsck or unclean shutdown.
+ */
if (lock)
spin_lock_irqsave(&sbi->cp_lock, flags);
__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
- kfree(NM_I(sbi)->nat_bits);
+ kvfree(NM_I(sbi)->nat_bits);
NM_I(sbi)->nat_bits = NULL;
if (lock)
spin_unlock_irqrestore(&sbi->cp_lock, flags);
@@ -2146,7 +2155,11 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
{
if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
- get_pages(sbi, F2FS_WB_CP_DATA))
+ get_pages(sbi, F2FS_WB_CP_DATA) ||
+ get_pages(sbi, F2FS_DIO_READ) ||
+ get_pages(sbi, F2FS_DIO_WRITE) ||
+ atomic_read(&SM_I(sbi)->dcc_info->queued_discard) ||
+ atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
return false;
return f2fs_time_over(sbi, type);
}
@@ -2370,6 +2383,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
case FI_NEW_INODE:
if (set)
return;
+ /* fall through */
case FI_DATA_EXIST:
case FI_INLINE_DOTS:
case FI_PIN_FILE:
@@ -2672,22 +2686,37 @@ static inline bool is_dot_dotdot(const struct qstr *str)
static inline bool f2fs_may_extent_tree(struct inode *inode)
{
- if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) ||
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (!test_opt(sbi, EXTENT_CACHE) ||
is_inode_flag_set(inode, FI_NO_EXTENT))
return false;
+ /*
+ * for recovered files during mount do not create extents
+ * if shrinker is not registered.
+ */
+ if (list_empty(&sbi->s_list))
+ return false;
+
return S_ISREG(inode->i_mode);
}
static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
+ void *ret;
+
if (time_to_inject(sbi, FAULT_KMALLOC)) {
f2fs_show_injection_info(FAULT_KMALLOC);
return NULL;
}
- return kmalloc(size, flags);
+ ret = kmalloc(size, flags);
+ if (ret)
+ return ret;
+
+ return kvmalloc(size, flags);
}
static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
@@ -2762,6 +2791,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
spin_unlock(&sbi->iostat_lock);
}
+#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
+
#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
(!is_read_io(fio->op) || fio->is_meta))
@@ -3007,7 +3038,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio, bool add_list);
void f2fs_wait_on_page_writeback(struct page *page,
- enum page_type type, bool ordered);
+ enum page_type type, bool ordered, bool locked);
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
block_t len);
@@ -3147,6 +3178,7 @@ struct f2fs_stat_info {
int total_count, utilization;
int bg_gc, nr_wb_cp_data, nr_wb_data;
int nr_rd_data, nr_rd_node, nr_rd_meta;
+ int nr_dio_read, nr_dio_write;
unsigned int io_skip_bggc, other_skip_bggc;
int nr_flushing, nr_flushed, flush_list_empty;
int nr_discarding, nr_discarded;
@@ -3459,9 +3491,9 @@ static inline bool f2fs_post_read_required(struct inode *inode)
}
#define F2FS_FEATURE_FUNCS(name, flagname) \
-static inline int f2fs_sb_has_##name(struct super_block *sb) \
+static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \
{ \
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \
+ return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \
}
F2FS_FEATURE_FUNCS(encrypt, ENCRYPT);
@@ -3491,7 +3523,7 @@ static inline int get_blkz_type(struct f2fs_sb_info *sbi,
static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi)
{
- return f2fs_sb_has_blkzoned(sbi->sb);
+ return f2fs_sb_has_blkzoned(sbi);
}
static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi)
@@ -3566,7 +3598,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
* for blkzoned device, fallback direct IO to buffered IO, so
* all IOs can be serialized by log-structured write.
*/
- if (f2fs_sb_has_blkzoned(sbi->sb))
+ if (f2fs_sb_has_blkzoned(sbi))
return true;
if (test_opt(sbi, LFS) && (rw == WRITE) &&
block_unaligned_IO(inode, iocb, iter))
@@ -3589,7 +3621,7 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
{
#ifdef CONFIG_QUOTA
- if (f2fs_sb_has_quota_ino(sbi->sb))
+ if (f2fs_sb_has_quota_ino(sbi))
return true;
if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 88b124677189..bba56b39dcc5 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -82,7 +82,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
}
/* fill the page */
- f2fs_wait_on_page_writeback(page, DATA, false);
+ f2fs_wait_on_page_writeback(page, DATA, false, true);
/* wait for GCed page writeback via META_MAPPING */
f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
@@ -216,6 +216,9 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
trace_f2fs_sync_file_enter(inode);
+ if (S_ISDIR(inode->i_mode))
+ goto go_write;
+
/* if fdatasync is triggered, let's do in-place-update */
if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
set_inode_flag(inode, FI_NEED_IPU);
@@ -575,7 +578,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
if (IS_ERR(page))
return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
truncate_out:
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
zero_user(page, offset, PAGE_SIZE - offset);
/* An encrypted inode should have a key and truncate the last page. */
@@ -696,7 +699,7 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
unsigned int flags;
if (f2fs_has_extra_attr(inode) &&
- f2fs_sb_has_inode_crtime(inode->i_sb) &&
+ f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
stat->result_mask |= STATX_BTIME;
stat->btime.tv_sec = fi->i_crtime.tv_sec;
@@ -892,7 +895,7 @@ static int fill_zero(struct inode *inode, pgoff_t index,
if (IS_ERR(page))
return PTR_ERR(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
zero_user(page, start, len);
set_page_dirty(page);
f2fs_put_page(page, 1);
@@ -1496,7 +1499,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
- .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE };
+ .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
+ .m_may_create = true };
pgoff_t pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
@@ -1681,7 +1685,7 @@ static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags)
inode->i_ctime = current_time(inode);
f2fs_set_inode_flags(inode);
- f2fs_mark_inode_dirty_sync(inode, false);
+ f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
@@ -1962,6 +1966,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
f2fs_stop_checkpoint(sbi, false);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
+ case F2FS_GOING_DOWN_NEED_FSCK:
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ /* do checkpoint only */
+ ret = f2fs_sync_fs(sb, 1);
+ if (ret)
+ goto out;
+ break;
default:
ret = -EINVAL;
goto out;
@@ -2030,7 +2041,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- if (!f2fs_sb_has_encrypt(inode->i_sb))
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
return -EOPNOTSUPP;
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
@@ -2040,7 +2051,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
{
- if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb))
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
return -EOPNOTSUPP;
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
}
@@ -2051,7 +2062,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err;
- if (!f2fs_sb_has_encrypt(inode->i_sb))
+ if (!f2fs_sb_has_encrypt(sbi))
return -EOPNOTSUPP;
err = mnt_want_write_file(filp);
@@ -2155,7 +2166,7 @@ do_more:
}
ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
- range.start += sbi->blocks_per_seg;
+ range.start += BLKS_PER_SEC(sbi);
if (range.start <= end)
goto do_more;
out:
@@ -2197,7 +2208,8 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
{
struct inode *inode = file_inode(filp);
struct f2fs_map_blocks map = { .m_next_extent = NULL,
- .m_seg_type = NO_CHECK_TYPE };
+ .m_seg_type = NO_CHECK_TYPE ,
+ .m_may_create = false };
struct extent_info ei = {0, 0, 0};
pgoff_t pg_start, pg_end, next_pgofs;
unsigned int blk_per_seg = sbi->blocks_per_seg;
@@ -2560,7 +2572,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
return -EFAULT;
if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
- sbi->segs_per_sec != 1) {
+ __is_large_section(sbi)) {
f2fs_msg(sbi->sb, KERN_WARNING,
"Can't flush %u in %d for segs_per_sec %u != 1\n",
range.dev_num, sbi->s_ndevs,
@@ -2635,12 +2647,11 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct super_block *sb = sbi->sb;
struct page *ipage;
kprojid_t kprojid;
int err;
- if (!f2fs_sb_has_project_quota(sb)) {
+ if (!f2fs_sb_has_project_quota(sbi)) {
if (projid != F2FS_DEF_PROJID)
return -EOPNOTSUPP;
else
@@ -2757,7 +2768,7 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg)
fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags &
F2FS_FL_USER_VISIBLE);
- if (f2fs_sb_has_project_quota(inode->i_sb))
+ if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
fi->i_projid);
@@ -2932,6 +2943,7 @@ int f2fs_precache_extents(struct inode *inode)
map.m_next_pgofs = NULL;
map.m_next_extent = &m_next_extent;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
end = F2FS_I_SB(inode)->max_file_blocks;
while (map.m_lblk < end) {
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index a07241fb8537..195cf0f9d9ef 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -142,7 +142,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(gc_th->f2fs_gc_task)) {
err = PTR_ERR(gc_th->f2fs_gc_task);
- kfree(gc_th);
+ kvfree(gc_th);
sbi->gc_thread = NULL;
}
out:
@@ -155,7 +155,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
if (!gc_th)
return;
kthread_stop(gc_th->f2fs_gc_task);
- kfree(gc_th);
+ kvfree(gc_th);
sbi->gc_thread = NULL;
}
@@ -323,8 +323,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
p.min_cost = get_max_cost(sbi, &p);
if (*result != NULL_SEGNO) {
- if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
- get_valid_blocks(sbi, *result, false) &&
+ if (get_valid_blocks(sbi, *result, false) &&
!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
p.min_segno = *result;
goto out;
@@ -333,6 +332,22 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
if (p.max_search == 0)
goto out;
+ if (__is_large_section(sbi) && p.alloc_mode == LFS) {
+ if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) {
+ p.min_segno = sbi->next_victim_seg[BG_GC];
+ *result = p.min_segno;
+ sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
+ goto got_result;
+ }
+ if (gc_type == FG_GC &&
+ sbi->next_victim_seg[FG_GC] != NULL_SEGNO) {
+ p.min_segno = sbi->next_victim_seg[FG_GC];
+ *result = p.min_segno;
+ sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
+ goto got_result;
+ }
+ }
+
last_victim = sm->last_victim[p.gc_mode];
if (p.alloc_mode == LFS && gc_type == FG_GC) {
p.min_segno = check_bg_victims(sbi);
@@ -395,6 +410,8 @@ next:
}
if (p.min_segno != NULL_SEGNO) {
got_it:
+ *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+got_result:
if (p.alloc_mode == LFS) {
secno = GET_SEC_FROM_SEG(sbi, p.min_segno);
if (gc_type == FG_GC)
@@ -402,13 +419,13 @@ got_it:
else
set_bit(secno, dirty_i->victim_secmap);
}
- *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+ }
+out:
+ if (p.min_segno != NULL_SEGNO)
trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
sbi->cur_victim_sec,
prefree_segments(sbi), free_segments(sbi));
- }
-out:
mutex_unlock(&dirty_i->seglist_lock);
return (p.min_segno == NULL_SEGNO) ? 0 : 1;
@@ -658,6 +675,14 @@ got_it:
fio.page = page;
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
+ /*
+ * don't cache encrypted data into meta inode until previous dirty
+ * data were writebacked to avoid racing between GC and flush.
+ */
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
+ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
dn.data_blkaddr,
FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -743,7 +768,9 @@ static int move_data_block(struct inode *inode, block_t bidx,
* don't cache encrypted data into meta inode until previous dirty
* data were writebacked to avoid racing between GC and flush.
*/
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
+ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
if (err)
@@ -802,8 +829,8 @@ static int move_data_block(struct inode *inode, block_t bidx,
}
write_page:
+ f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true);
set_page_dirty(fio.encrypted_page);
- f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
dec_page_count(fio.sbi, F2FS_DIRTY_META);
@@ -811,7 +838,7 @@ write_page:
ClearPageError(page);
/* allocate block address */
- f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
fio.op = REQ_OP_WRITE;
fio.op_flags = REQ_SYNC;
@@ -897,8 +924,9 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
bool is_dirty = PageDirty(page);
retry:
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
f2fs_remove_dirty_inode(inode);
@@ -1093,15 +1121,18 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
struct blk_plug plug;
unsigned int segno = start_segno;
unsigned int end_segno = start_segno + sbi->segs_per_sec;
- int seg_freed = 0;
+ int seg_freed = 0, migrated = 0;
unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
SUM_TYPE_DATA : SUM_TYPE_NODE;
int submitted = 0;
+ if (__is_large_section(sbi))
+ end_segno = rounddown(end_segno, sbi->segs_per_sec);
+
/* readahead multi ssa blocks those have contiguous address */
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
- sbi->segs_per_sec, META_SSA, true);
+ end_segno - segno, META_SSA, true);
/* reference all summary page */
while (segno < end_segno) {
@@ -1130,10 +1161,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
GET_SUM_BLOCK(sbi, segno));
f2fs_put_page(sum_page, 0);
- if (get_valid_blocks(sbi, segno, false) == 0 ||
- !PageUptodate(sum_page) ||
- unlikely(f2fs_cp_error(sbi)))
- goto next;
+ if (get_valid_blocks(sbi, segno, false) == 0)
+ goto freed;
+ if (__is_large_section(sbi) &&
+ migrated >= sbi->migration_granularity)
+ goto skip;
+ if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi)))
+ goto skip;
sum = page_address(sum_page);
if (type != GET_SUM_TYPE((&sum->footer))) {
@@ -1141,7 +1175,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
"type [%d, %d] in SSA and SIT",
segno, type, GET_SUM_TYPE((&sum->footer)));
set_sbi_flag(sbi, SBI_NEED_FSCK);
- goto next;
+ goto skip;
}
/*
@@ -1160,10 +1194,15 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
stat_inc_seg_count(sbi, type, gc_type);
+freed:
if (gc_type == FG_GC &&
get_valid_blocks(sbi, segno, false) == 0)
seg_freed++;
-next:
+ migrated++;
+
+ if (__is_large_section(sbi) && segno + 1 < end_segno)
+ sbi->next_victim_seg[gc_type] = segno + 1;
+skip:
f2fs_put_page(sum_page, 0);
}
@@ -1307,7 +1346,7 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
/* give warm/cold data area from slower device */
- if (sbi->s_ndevs && sbi->segs_per_sec == 1)
+ if (sbi->s_ndevs && !__is_large_section(sbi))
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 7b0cff7e6051..d636cbcf68f2 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -72,7 +72,7 @@ void f2fs_truncate_inline_inode(struct inode *inode,
addr = inline_data_addr(inode, ipage);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
memset(addr + from, 0, MAX_INLINE_DATA(inode) - from);
set_page_dirty(ipage);
@@ -161,7 +161,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
fio.old_blkaddr = dn->data_blkaddr;
set_inode_flag(dn->inode, FI_HOT_DATA);
f2fs_outplace_write_data(dn, &fio);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
if (dirty) {
inode_dec_dirty_pages(dn->inode);
f2fs_remove_dirty_inode(dn->inode);
@@ -236,7 +236,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
f2fs_bug_on(F2FS_I_SB(inode), page->index);
- f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true);
src_addr = kmap_atomic(page);
dst_addr = inline_data_addr(inode, dn.inode_page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
@@ -277,7 +277,7 @@ process_inline:
ipage = f2fs_get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
src_addr = inline_data_addr(inode, npage);
dst_addr = inline_data_addr(inode, ipage);
@@ -391,7 +391,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
goto out;
}
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
dentry_blk = page_address(page);
@@ -501,18 +501,18 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
- kfree(backup_dentry);
+ kvfree(backup_dentry);
return 0;
recover:
lock_page(ipage);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
f2fs_i_depth_write(dir, 0);
f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
- kfree(backup_dentry);
+ kvfree(backup_dentry);
return err;
}
@@ -565,7 +565,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
}
}
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
name_hash = f2fs_dentry_hash(new_name, NULL);
f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
@@ -597,7 +597,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
int i;
lock_page(page);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
inline_dentry = inline_data_addr(dir, page);
make_dentry_ptr_inline(dir, &d, inline_dentry);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 91ceee0ed4c4..bec52961630b 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -103,7 +103,7 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
while (start < end) {
if (*start++) {
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
set_inode_flag(inode, FI_DATA_EXIST);
set_raw_inline(inode, F2FS_INODE(ipage));
@@ -118,7 +118,7 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page
{
struct f2fs_inode *ri = &F2FS_NODE(page)->i;
- if (!f2fs_sb_has_inode_chksum(sbi->sb))
+ if (!f2fs_sb_has_inode_chksum(sbi))
return false;
if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
@@ -218,7 +218,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)
&& !f2fs_has_extra_attr(inode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
@@ -228,7 +228,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
if (f2fs_has_extra_attr(inode) &&
- !f2fs_sb_has_extra_attr(sbi->sb)) {
+ !f2fs_sb_has_extra_attr(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) is with extra_attr, "
@@ -340,7 +340,7 @@ static int do_read_inode(struct inode *inode)
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
le16_to_cpu(ri->i_extra_isize) : 0;
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size);
} else if (f2fs_has_inline_xattr(inode) ||
f2fs_has_inline_dentry(inode)) {
@@ -390,14 +390,14 @@ static int do_read_inode(struct inode *inode)
if (fi->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
- if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) &&
+ if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
i_projid = (projid_t)le32_to_cpu(ri->i_projid);
else
i_projid = F2FS_DEF_PROJID;
fi->i_projid = make_kprojid(&init_user_ns, i_projid);
- if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi->sb) &&
+ if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime);
fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
@@ -497,7 +497,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
struct f2fs_inode *ri;
struct extent_tree *et = F2FS_I(inode)->extent_tree;
- f2fs_wait_on_page_writeback(node_page, NODE, true);
+ f2fs_wait_on_page_writeback(node_page, NODE, true, true);
set_page_dirty(node_page);
f2fs_inode_synced(inode);
@@ -542,11 +542,11 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
if (f2fs_has_extra_attr(inode)) {
ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
- if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb))
+ if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)))
ri->i_inline_xattr_size =
cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size);
- if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
+ if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_projid)) {
projid_t i_projid;
@@ -556,7 +556,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
ri->i_projid = cpu_to_le32(i_projid);
}
- if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)->sb) &&
+ if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_crtime)) {
ri->i_crtime =
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 99299ede7429..62d9829f3a6a 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -61,7 +61,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
goto fail;
}
- if (f2fs_sb_has_project_quota(sbi->sb) &&
+ if (f2fs_sb_has_project_quota(sbi) &&
(F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
else
@@ -79,7 +79,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
- if (f2fs_sb_has_extra_attr(sbi->sb)) {
+ if (f2fs_sb_has_extra_attr(sbi)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
}
@@ -92,7 +92,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (f2fs_may_inline_dentry(inode))
set_inode_flag(inode, FI_INLINE_DENTRY);
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
if (f2fs_has_inline_xattr(inode))
xattr_size = F2FS_OPTION(sbi).inline_xattr_size;
@@ -635,7 +635,7 @@ out_f2fs_handle_failed_inode:
f2fs_handle_failed_inode(inode);
out_free_encrypted_link:
if (disk_link.name != (unsigned char *)symname)
- kfree(disk_link.name);
+ kvfree(disk_link.name);
return err;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d338740d0fda..4f450e573312 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -826,6 +826,7 @@ static int truncate_node(struct dnode_of_data *dn)
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info ni;
int err;
+ pgoff_t index;
err = f2fs_get_node_info(sbi, dn->nid, &ni);
if (err)
@@ -845,10 +846,11 @@ static int truncate_node(struct dnode_of_data *dn)
clear_node_page_dirty(dn->node_page);
set_sbi_flag(sbi, SBI_IS_DIRTY);
+ index = dn->node_page->index;
f2fs_put_page(dn->node_page, 1);
invalidate_mapping_pages(NODE_MAPPING(sbi),
- dn->node_page->index, dn->node_page->index);
+ index, index);
dn->node_page = NULL;
trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
@@ -1104,7 +1106,7 @@ skip_partial:
ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
lock_page(page);
BUG_ON(page->mapping != NODE_MAPPING(sbi));
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
set_page_dirty(page);
unlock_page(page);
@@ -1232,7 +1234,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
new_ni.version = 0;
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(page, S_ISDIR(dn->inode->i_mode));
if (!PageUptodate(page))
@@ -1596,10 +1598,10 @@ int f2fs_move_node_page(struct page *node_page, int gc_type)
.for_reclaim = 0,
};
+ f2fs_wait_on_page_writeback(node_page, NODE, true, true);
+
set_page_dirty(node_page);
- f2fs_wait_on_page_writeback(node_page, NODE, true);
- f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page));
if (!clear_page_dirty_for_io(node_page)) {
err = -EAGAIN;
goto out_page;
@@ -1687,8 +1689,7 @@ continue_unlock:
goto continue_unlock;
}
- f2fs_wait_on_page_writeback(page, NODE, true);
- BUG_ON(PageWriteback(page));
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
set_fsync_mark(page, 0);
set_dentry_mark(page, 0);
@@ -1739,7 +1740,7 @@ continue_unlock:
"Retry to write fsync mark: ino=%u, idx=%lx",
ino, last_page->index);
lock_page(last_page);
- f2fs_wait_on_page_writeback(last_page, NODE, true);
+ f2fs_wait_on_page_writeback(last_page, NODE, true, true);
set_page_dirty(last_page);
unlock_page(last_page);
goto retry;
@@ -1820,9 +1821,8 @@ continue_unlock:
goto lock_node;
}
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
- BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -1889,7 +1889,7 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
get_page(page);
spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, false);
if (TestClearPageError(page))
ret = -EIO;
@@ -2467,7 +2467,7 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
src_addr = inline_xattr_addr(inode, page);
inline_size = inline_xattr_size(inode);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
memcpy(dst_addr, src_addr, inline_size);
update_inode:
f2fs_update_inode(inode, ipage);
@@ -2561,17 +2561,17 @@ retry:
if (dst->i_inline & F2FS_EXTRA_ATTR) {
dst->i_extra_isize = src->i_extra_isize;
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) &&
+ if (f2fs_sb_has_flexible_inline_xattr(sbi) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_inline_xattr_size))
dst->i_inline_xattr_size = src->i_inline_xattr_size;
- if (f2fs_sb_has_project_quota(sbi->sb) &&
+ if (f2fs_sb_has_project_quota(sbi) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_projid))
dst->i_projid = src->i_projid;
- if (f2fs_sb_has_inode_crtime(sbi->sb) &&
+ if (f2fs_sb_has_inode_crtime(sbi) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_crtime_nsec)) {
dst->i_crtime = src->i_crtime;
@@ -3113,17 +3113,17 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
for (i = 0; i < nm_i->nat_blocks; i++)
kvfree(nm_i->free_nid_bitmap[i]);
- kfree(nm_i->free_nid_bitmap);
+ kvfree(nm_i->free_nid_bitmap);
}
kvfree(nm_i->free_nid_count);
- kfree(nm_i->nat_bitmap);
- kfree(nm_i->nat_bits);
+ kvfree(nm_i->nat_bitmap);
+ kvfree(nm_i->nat_bits);
#ifdef CONFIG_F2FS_CHECK_FS
- kfree(nm_i->nat_bitmap_mir);
+ kvfree(nm_i->nat_bitmap_mir);
#endif
sbi->nm_info = NULL;
- kfree(nm_i);
+ kvfree(nm_i);
}
int __init f2fs_create_node_manager_caches(void)
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 1c73d879a9bc..e05af5df5648 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -361,7 +361,7 @@ static inline int set_nid(struct page *p, int off, nid_t nid, bool i)
{
struct f2fs_node *rn = F2FS_NODE(p);
- f2fs_wait_on_page_writeback(p, NODE, true);
+ f2fs_wait_on_page_writeback(p, NODE, true, true);
if (i)
rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 1dfb17f9f9ff..e3883db868d8 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -250,7 +250,7 @@ static int recover_inode(struct inode *inode, struct page *page)
i_gid_write(inode, le32_to_cpu(raw->i_gid));
if (raw->i_inline & F2FS_EXTRA_ATTR) {
- if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
+ if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize),
i_projid)) {
projid_t i_projid;
@@ -539,7 +539,7 @@ retry_dn:
goto out;
}
- f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
err = f2fs_get_node_info(sbi, dn.nid, &ni);
if (err)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 6edcf8391dd3..9b79056d705d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -229,7 +229,7 @@ static int __revoke_inmem_pages(struct inode *inode,
lock_page(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
if (recover) {
struct dnode_of_data dn;
@@ -387,8 +387,9 @@ static int __f2fs_commit_inmem_pages(struct inode *inode)
if (page->mapping == inode->i_mapping) {
trace_f2fs_commit_inmem_page(page, INMEM);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
f2fs_remove_dirty_inode(inode);
@@ -620,14 +621,16 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
return 0;
if (!test_opt(sbi, FLUSH_MERGE)) {
+ atomic_inc(&fcc->queued_flush);
ret = submit_flush_wait(sbi, ino);
+ atomic_dec(&fcc->queued_flush);
atomic_inc(&fcc->issued_flush);
return ret;
}
- if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
+ if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) {
ret = submit_flush_wait(sbi, ino);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
atomic_inc(&fcc->issued_flush);
return ret;
@@ -646,14 +649,14 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
if (fcc->f2fs_issue_flush) {
wait_for_completion(&cmd.wait);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
} else {
struct llist_node *list;
list = llist_del_all(&fcc->issue_list);
if (!list) {
wait_for_completion(&cmd.wait);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
} else {
struct flush_cmd *tmp, *next;
@@ -662,7 +665,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
llist_for_each_entry_safe(tmp, next, list, llnode) {
if (tmp == &cmd) {
cmd.ret = ret;
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
continue;
}
tmp->ret = ret;
@@ -691,7 +694,7 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
if (!fcc)
return -ENOMEM;
atomic_set(&fcc->issued_flush, 0);
- atomic_set(&fcc->issing_flush, 0);
+ atomic_set(&fcc->queued_flush, 0);
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
SM_I(sbi)->fcc_info = fcc;
@@ -703,7 +706,7 @@ init_thread:
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
- kfree(fcc);
+ kvfree(fcc);
SM_I(sbi)->fcc_info = NULL;
return err;
}
@@ -722,7 +725,7 @@ void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
kthread_stop(flush_thread);
}
if (free) {
- kfree(fcc);
+ kvfree(fcc);
SM_I(sbi)->fcc_info = NULL;
}
}
@@ -907,7 +910,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
dc->len = len;
dc->ref = 0;
dc->state = D_PREP;
- dc->issuing = 0;
+ dc->queued = 0;
dc->error = 0;
init_completion(&dc->wait);
list_add_tail(&dc->list, pend_list);
@@ -940,7 +943,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc,
struct discard_cmd *dc)
{
if (dc->state == D_DONE)
- atomic_sub(dc->issuing, &dcc->issing_discard);
+ atomic_sub(dc->queued, &dcc->queued_discard);
list_del(&dc->list);
rb_erase_cached(&dc->rb_node, &dcc->root);
@@ -1143,12 +1146,12 @@ submit:
dc->bio_ref++;
spin_unlock_irqrestore(&dc->lock, flags);
- atomic_inc(&dcc->issing_discard);
- dc->issuing++;
+ atomic_inc(&dcc->queued_discard);
+ dc->queued++;
list_move_tail(&dc->list, wait_list);
/* sanity check on discard range */
- __check_sit_bitmap(sbi, start, start + len);
+ __check_sit_bitmap(sbi, lstart, lstart + len);
bio->bi_private = dc;
bio->bi_end_io = f2fs_submit_discard_endio;
@@ -1649,6 +1652,10 @@ static int issue_discard_thread(void *data)
if (dcc->discard_wake)
dcc->discard_wake = 0;
+ /* clean up pending candidates before going to sleep */
+ if (atomic_read(&dcc->queued_discard))
+ __wait_all_discard_cmd(sbi, NULL);
+
if (try_to_freeze())
continue;
if (f2fs_readonly(sbi->sb))
@@ -1734,7 +1741,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi->sb) &&
+ if (f2fs_sb_has_blkzoned(sbi) &&
bdev_zoned_model(bdev) != BLK_ZONED_NONE)
return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
@@ -1882,7 +1889,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
- bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
+ bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
mutex_lock(&dirty_i->seglist_lock);
@@ -1914,7 +1921,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
(end - 1) <= cpc->trim_end)
continue;
- if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
+ if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
(end - start) << sbi->log_blocks_per_seg);
continue;
@@ -1946,7 +1953,7 @@ find_next:
sbi->blocks_per_seg, cur_pos);
len = next_pos - cur_pos;
- if (f2fs_sb_has_blkzoned(sbi->sb) ||
+ if (f2fs_sb_has_blkzoned(sbi) ||
(force && len < cpc->trim_minlen))
goto skip;
@@ -1994,7 +2001,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&dcc->fstrim_list);
mutex_init(&dcc->cmd_lock);
atomic_set(&dcc->issued_discard, 0);
- atomic_set(&dcc->issing_discard, 0);
+ atomic_set(&dcc->queued_discard, 0);
atomic_set(&dcc->discard_cmd_cnt, 0);
dcc->nr_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
@@ -2010,7 +2017,7 @@ init_thread:
"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(dcc->f2fs_issue_discard)) {
err = PTR_ERR(dcc->f2fs_issue_discard);
- kfree(dcc);
+ kvfree(dcc);
SM_I(sbi)->dcc_info = NULL;
return err;
}
@@ -2027,7 +2034,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
f2fs_stop_discard_thread(sbi);
- kfree(dcc);
+ kvfree(dcc);
SM_I(sbi)->dcc_info = NULL;
}
@@ -2146,7 +2153,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
/* update total number of valid blocks to be written in ckpt area */
SIT_I(sbi)->written_valid_blocks += del;
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
get_sec_entry(sbi, segno)->valid_blocks += del;
}
@@ -2412,7 +2419,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
/* if segs_per_sec is large than 1, we need to keep original policy. */
- if (sbi->segs_per_sec != 1)
+ if (__is_large_section(sbi))
return CURSEG_I(sbi, type)->segno;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
@@ -2722,7 +2729,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
struct discard_policy dpolicy;
unsigned long long trimmed = 0;
int err = 0;
- bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
+ bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
@@ -3272,16 +3279,18 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
}
void f2fs_wait_on_page_writeback(struct page *page,
- enum page_type type, bool ordered)
+ enum page_type type, bool ordered, bool locked)
{
if (PageWriteback(page)) {
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
- if (ordered)
+ if (ordered) {
wait_on_page_writeback(page);
- else
+ f2fs_bug_on(sbi, locked && PageWriteback(page));
+ } else {
wait_for_stable_page(page);
+ }
}
}
@@ -3298,7 +3307,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
if (cpage) {
- f2fs_wait_on_page_writeback(cpage, DATA, true);
+ f2fs_wait_on_page_writeback(cpage, DATA, true, true);
f2fs_put_page(cpage, 1);
}
}
@@ -3880,7 +3889,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
if (!sit_i->tmp_map)
return -ENOMEM;
- if (sbi->segs_per_sec > 1) {
+ if (__is_large_section(sbi)) {
sit_i->sec_entries =
f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
MAIN_SECS(sbi)),
@@ -4035,7 +4044,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
se->valid_blocks;
}
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
get_sec_entry(sbi, start)->valid_blocks +=
se->valid_blocks;
}
@@ -4079,7 +4088,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
sbi->discard_blks -= se->valid_blocks;
}
- if (sbi->segs_per_sec > 1) {
+ if (__is_large_section(sbi)) {
get_sec_entry(sbi, start)->valid_blocks +=
se->valid_blocks;
get_sec_entry(sbi, start)->valid_blocks -=
@@ -4314,7 +4323,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
destroy_victim_secmap(sbi);
SM_I(sbi)->dirty_info = NULL;
- kfree(dirty_i);
+ kvfree(dirty_i);
}
static void destroy_curseg(struct f2fs_sb_info *sbi)
@@ -4326,10 +4335,10 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
return;
SM_I(sbi)->curseg_array = NULL;
for (i = 0; i < NR_CURSEG_TYPE; i++) {
- kfree(array[i].sum_blk);
- kfree(array[i].journal);
+ kvfree(array[i].sum_blk);
+ kvfree(array[i].journal);
}
- kfree(array);
+ kvfree(array);
}
static void destroy_free_segmap(struct f2fs_sb_info *sbi)
@@ -4340,7 +4349,7 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi)
SM_I(sbi)->free_info = NULL;
kvfree(free_i->free_segmap);
kvfree(free_i->free_secmap);
- kfree(free_i);
+ kvfree(free_i);
}
static void destroy_sit_info(struct f2fs_sb_info *sbi)
@@ -4353,26 +4362,26 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
if (sit_i->sentries) {
for (start = 0; start < MAIN_SEGS(sbi); start++) {
- kfree(sit_i->sentries[start].cur_valid_map);
+ kvfree(sit_i->sentries[start].cur_valid_map);
#ifdef CONFIG_F2FS_CHECK_FS
- kfree(sit_i->sentries[start].cur_valid_map_mir);
+ kvfree(sit_i->sentries[start].cur_valid_map_mir);
#endif
- kfree(sit_i->sentries[start].ckpt_valid_map);
- kfree(sit_i->sentries[start].discard_map);
+ kvfree(sit_i->sentries[start].ckpt_valid_map);
+ kvfree(sit_i->sentries[start].discard_map);
}
}
- kfree(sit_i->tmp_map);
+ kvfree(sit_i->tmp_map);
kvfree(sit_i->sentries);
kvfree(sit_i->sec_entries);
kvfree(sit_i->dirty_sentries_bitmap);
SM_I(sbi)->sit_info = NULL;
- kfree(sit_i->sit_bitmap);
+ kvfree(sit_i->sit_bitmap);
#ifdef CONFIG_F2FS_CHECK_FS
- kfree(sit_i->sit_bitmap_mir);
+ kvfree(sit_i->sit_bitmap_mir);
#endif
- kfree(sit_i);
+ kvfree(sit_i);
}
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
@@ -4388,7 +4397,7 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
destroy_free_segmap(sbi);
destroy_sit_info(sbi);
sbi->sm_info = NULL;
- kfree(sm_info);
+ kvfree(sm_info);
}
int __init f2fs_create_segment_manager_caches(void)
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index ab3465faddf1..a77f76f528b6 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -333,7 +333,7 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
* In order to get # of valid blocks in a section instantly from many
* segments, f2fs manages two counting structures separately.
*/
- if (use_section && sbi->segs_per_sec > 1)
+ if (use_section && __is_large_section(sbi))
return get_sec_entry(sbi, segno)->valid_blocks;
else
return get_seg_entry(sbi, segno)->valid_blocks;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 9e13db994fdf..a467aca29cfe 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -135,6 +135,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
spin_lock(&f2fs_list_lock);
- list_del(&sbi->s_list);
+ list_del_init(&sbi->s_list);
spin_unlock(&f2fs_list_lock);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index af58b2cc21b8..c46a1d4318d4 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -38,7 +38,7 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
-char *f2fs_fault_name[FAULT_MAX] = {
+const char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
[FAULT_KVMALLOC] = "kvmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
@@ -259,7 +259,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
"quota options when quota turned on");
return -EINVAL;
}
- if (f2fs_sb_has_quota_ino(sb)) {
+ if (f2fs_sb_has_quota_ino(sbi)) {
f2fs_msg(sb, KERN_INFO,
"QUOTA feature is enabled, so ignore qf_name");
return 0;
@@ -289,7 +289,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
set_opt(sbi, QUOTA);
return 0;
errout:
- kfree(qname);
+ kvfree(qname);
return ret;
}
@@ -302,7 +302,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype)
" when quota turned on");
return -EINVAL;
}
- kfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
F2FS_OPTION(sbi).s_qf_names[qtype] = NULL;
return 0;
}
@@ -314,7 +314,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
* 'grpquota' mount options are allowed even without quota feature
* to support legacy quotas in quota files.
*/
- if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi->sb)) {
+ if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) {
f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. "
"Cannot enable project quota enforcement.");
return -1;
@@ -348,7 +348,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
}
}
- if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) {
+ if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) {
f2fs_msg(sbi->sb, KERN_INFO,
"QUOTA feature is enabled, so ignore jquota_fmt");
F2FS_OPTION(sbi).s_jquota_fmt = 0;
@@ -399,10 +399,10 @@ static int parse_options(struct super_block *sb, char *options)
set_opt(sbi, BG_GC);
set_opt(sbi, FORCE_FG_GC);
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_disable_roll_forward:
set_opt(sbi, DISABLE_ROLL_FORWARD);
@@ -417,7 +417,7 @@ static int parse_options(struct super_block *sb, char *options)
set_opt(sbi, DISCARD);
break;
case Opt_nodiscard:
- if (f2fs_sb_has_blkzoned(sb)) {
+ if (f2fs_sb_has_blkzoned(sbi)) {
f2fs_msg(sb, KERN_WARNING,
"discard is required for zoned block devices");
return -EINVAL;
@@ -566,11 +566,11 @@ static int parse_options(struct super_block *sb, char *options)
return -ENOMEM;
if (strlen(name) == 8 &&
!strncmp(name, "adaptive", 8)) {
- if (f2fs_sb_has_blkzoned(sb)) {
+ if (f2fs_sb_has_blkzoned(sbi)) {
f2fs_msg(sb, KERN_WARNING,
"adaptive mode is not allowed with "
"zoned block device feature");
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
@@ -578,10 +578,10 @@ static int parse_options(struct super_block *sb, char *options)
!strncmp(name, "lfs", 3)) {
set_opt_mode(sbi, F2FS_MOUNT_LFS);
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_io_size_bits:
if (args->from && match_int(args, &arg))
@@ -714,10 +714,10 @@ static int parse_options(struct super_block *sb, char *options)
!strncmp(name, "fs-based", 8)) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_alloc:
name = match_strdup(&args[0]);
@@ -731,10 +731,10 @@ static int parse_options(struct super_block *sb, char *options)
!strncmp(name, "reuse", 5)) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_fsync:
name = match_strdup(&args[0]);
@@ -751,14 +751,14 @@ static int parse_options(struct super_block *sb, char *options)
F2FS_OPTION(sbi).fsync_mode =
FSYNC_MODE_NOBARRIER;
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_test_dummy_encryption:
#ifdef CONFIG_F2FS_FS_ENCRYPTION
- if (!f2fs_sb_has_encrypt(sb)) {
+ if (!f2fs_sb_has_encrypt(sbi)) {
f2fs_msg(sb, KERN_ERR, "Encrypt feature is off");
return -EINVAL;
}
@@ -783,10 +783,10 @@ static int parse_options(struct super_block *sb, char *options)
!strncmp(name, "disable", 7)) {
set_opt(sbi, DISABLE_CHECKPOINT);
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
default:
f2fs_msg(sb, KERN_ERR,
@@ -799,13 +799,13 @@ static int parse_options(struct super_block *sb, char *options)
if (f2fs_check_quota_options(sbi))
return -EINVAL;
#else
- if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) {
f2fs_msg(sbi->sb, KERN_INFO,
"Filesystem with quota feature cannot be mounted RDWR "
"without CONFIG_QUOTA");
return -EINVAL;
}
- if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+ if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) {
f2fs_msg(sb, KERN_ERR,
"Filesystem with project quota feature cannot be "
"mounted RDWR without CONFIG_QUOTA");
@@ -821,8 +821,8 @@ static int parse_options(struct super_block *sb, char *options)
}
if (test_opt(sbi, INLINE_XATTR_SIZE)) {
- if (!f2fs_sb_has_extra_attr(sb) ||
- !f2fs_sb_has_flexible_inline_xattr(sb)) {
+ if (!f2fs_sb_has_extra_attr(sbi) ||
+ !f2fs_sb_has_flexible_inline_xattr(sbi)) {
f2fs_msg(sb, KERN_ERR,
"extra_attr or flexible_inline_xattr "
"feature is off");
@@ -1017,10 +1017,10 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
for (i = 0; i < sbi->s_ndevs; i++) {
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
- kfree(FDEV(i).blkz_type);
+ kvfree(FDEV(i).blkz_type);
#endif
}
- kfree(sbi->devs);
+ kvfree(sbi->devs);
}
static void f2fs_put_super(struct super_block *sb)
@@ -1058,9 +1058,6 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_write_checkpoint(sbi, &cpc);
}
- /* f2fs_write_checkpoint can update stat informaion */
- f2fs_destroy_stats(sbi);
-
/*
* normally superblock is clean, so we need to release this.
* In addition, EIO will skip do checkpoint, we need this as well.
@@ -1080,29 +1077,35 @@ static void f2fs_put_super(struct super_block *sb)
iput(sbi->node_inode);
iput(sbi->meta_inode);
+ /*
+ * iput() can update stat information, if f2fs_write_checkpoint()
+ * above failed with error.
+ */
+ f2fs_destroy_stats(sbi);
+
/* destroy f2fs internal modules */
f2fs_destroy_node_manager(sbi);
f2fs_destroy_segment_manager(sbi);
- kfree(sbi->ckpt);
+ kvfree(sbi->ckpt);
f2fs_unregister_sysfs(sbi);
sb->s_fs_info = NULL;
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
- kfree(sbi->raw_super);
+ kvfree(sbi->raw_super);
destroy_device_list(sbi);
mempool_destroy(sbi->write_io_dummy);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
- kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
destroy_percpu_info(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
- kfree(sbi->write_io[i]);
- kfree(sbi);
+ kvfree(sbi->write_io[i]);
+ kvfree(sbi);
}
int f2fs_sync_fs(struct super_block *sb, int sync)
@@ -1431,7 +1434,7 @@ static void default_options(struct f2fs_sb_info *sbi)
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
set_opt(sbi, DISCARD);
- if (f2fs_sb_has_blkzoned(sbi->sb))
+ if (f2fs_sb_has_blkzoned(sbi))
set_opt_mode(sbi, F2FS_MOUNT_LFS);
else
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
@@ -1457,19 +1460,16 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
sbi->sb->s_flags |= SB_ACTIVE;
- mutex_lock(&sbi->gc_mutex);
f2fs_update_time(sbi, DISABLE_TIME);
while (!f2fs_time_over(sbi, DISABLE_TIME)) {
+ mutex_lock(&sbi->gc_mutex);
err = f2fs_gc(sbi, true, false, NULL_SEGNO);
if (err == -ENODATA)
break;
- if (err && err != -EAGAIN) {
- mutex_unlock(&sbi->gc_mutex);
+ if (err && err != -EAGAIN)
return err;
- }
}
- mutex_unlock(&sbi->gc_mutex);
err = sync_filesystem(sbi->sb);
if (err)
@@ -1531,7 +1531,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
GFP_KERNEL);
if (!org_mount_opt.s_qf_names[i]) {
for (j = 0; j < i; j++)
- kfree(org_mount_opt.s_qf_names[j]);
+ kvfree(org_mount_opt.s_qf_names[j]);
return -ENOMEM;
}
} else {
@@ -1575,7 +1575,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags &= ~SB_RDONLY;
if (sb_any_quota_suspended(sb)) {
dquot_resume(sb, -1);
- } else if (f2fs_sb_has_quota_ino(sb)) {
+ } else if (f2fs_sb_has_quota_ino(sbi)) {
err = f2fs_enable_quotas(sb);
if (err)
goto restore_opts;
@@ -1651,7 +1651,7 @@ skip:
#ifdef CONFIG_QUOTA
/* Release old quota file names */
for (i = 0; i < MAXQUOTAS; i++)
- kfree(org_mount_opt.s_qf_names[i]);
+ kvfree(org_mount_opt.s_qf_names[i]);
#endif
/* Update the POSIXACL Flag */
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
@@ -1672,7 +1672,7 @@ restore_opts:
#ifdef CONFIG_QUOTA
F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt;
for (i = 0; i < MAXQUOTAS; i++) {
- kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i];
}
#endif
@@ -1817,7 +1817,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
int enabled = 0;
int i, err;
- if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) {
+ if (f2fs_sb_has_quota_ino(sbi) && rdonly) {
err = f2fs_enable_quotas(sbi->sb);
if (err) {
f2fs_msg(sbi->sb, KERN_ERR,
@@ -1848,7 +1848,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
unsigned long qf_inum;
int err;
- BUG_ON(!f2fs_sb_has_quota_ino(sb));
+ BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb)));
qf_inum = f2fs_qf_ino(sb, type);
if (!qf_inum)
@@ -1993,7 +1993,7 @@ static int f2fs_quota_off(struct super_block *sb, int type)
goto out_put;
err = dquot_quota_off(sb, type);
- if (err || f2fs_sb_has_quota_ino(sb))
+ if (err || f2fs_sb_has_quota_ino(F2FS_SB(sb)))
goto out_put;
inode_lock(inode);
@@ -2173,7 +2173,7 @@ static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
* if LOST_FOUND feature is enabled.
*
*/
- if (f2fs_sb_has_lost_found(sbi->sb) &&
+ if (f2fs_sb_has_lost_found(sbi) &&
inode->i_ino == F2FS_ROOT_INO(sbi))
return -EPERM;
@@ -2396,7 +2396,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
__u32 crc = 0;
/* Check checksum_offset and crc in superblock */
- if (le32_to_cpu(raw_super->feature) & F2FS_FEATURE_SB_CHKSUM) {
+ if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) {
crc_offset = le32_to_cpu(raw_super->checksum_offset);
if (crc_offset !=
offsetof(struct f2fs_super_block, crc)) {
@@ -2496,10 +2496,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 1;
}
- if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) {
+ if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) {
f2fs_msg(sb, KERN_INFO,
- "Wrong segment_count / block_count (%u > %u)",
- segment_count, le32_to_cpu(raw_super->block_count));
+ "Wrong segment_count / block_count (%u > %llu)",
+ segment_count, le64_to_cpu(raw_super->block_count));
return 1;
}
@@ -2674,7 +2674,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
static void init_sb_info(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = sbi->raw_super;
- int i, j;
+ int i;
sbi->log_sectors_per_block =
le32_to_cpu(raw_super->log_sectors_per_block);
@@ -2692,7 +2692,10 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
sbi->cur_victim_sec = NULL_SECNO;
+ sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
+ sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
+ sbi->migration_granularity = sbi->segs_per_sec;
sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@ -2710,9 +2713,6 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
- for (i = 0; i < NR_PAGE_TYPE - 1; i++)
- for (j = HOT; j < NR_TEMP_TYPE; j++)
- mutex_init(&sbi->wio_mutex[i][j]);
init_rwsem(&sbi->io_order_lock);
spin_lock_init(&sbi->cp_lock);
@@ -2749,7 +2749,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
unsigned int n = 0;
int err = -EIO;
- if (!f2fs_sb_has_blkzoned(sbi->sb))
+ if (!f2fs_sb_has_blkzoned(sbi))
return 0;
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
@@ -2800,7 +2800,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
}
}
- kfree(zones);
+ kvfree(zones);
return err;
}
@@ -2860,7 +2860,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
/* No valid superblock */
if (!*raw_super)
- kfree(super);
+ kvfree(super);
else
err = 0;
@@ -2880,7 +2880,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
}
/* we should update superblock crc here */
- if (!recover && f2fs_sb_has_sb_chksum(sbi->sb)) {
+ if (!recover && f2fs_sb_has_sb_chksum(sbi)) {
crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi),
offsetof(struct f2fs_super_block, crc));
F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc);
@@ -2968,7 +2968,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
#ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
- !f2fs_sb_has_blkzoned(sbi->sb)) {
+ !f2fs_sb_has_blkzoned(sbi)) {
f2fs_msg(sbi->sb, KERN_ERR,
"Zoned block device feature not enabled\n");
return -EINVAL;
@@ -3064,7 +3064,7 @@ try_onemore:
sbi->raw_super = raw_super;
/* precompute checksum seed for metadata */
- if (f2fs_sb_has_inode_chksum(sb))
+ if (f2fs_sb_has_inode_chksum(sbi))
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
sizeof(raw_super->uuid));
@@ -3074,7 +3074,7 @@ try_onemore:
* devices, but mandatory for host-managed zoned block devices.
*/
#ifndef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sb)) {
+ if (f2fs_sb_has_blkzoned(sbi)) {
f2fs_msg(sb, KERN_ERR,
"Zoned block device support is not enabled\n");
err = -EOPNOTSUPP;
@@ -3101,13 +3101,13 @@ try_onemore:
#ifdef CONFIG_QUOTA
sb->dq_op = &f2fs_quota_operations;
- if (f2fs_sb_has_quota_ino(sb))
+ if (f2fs_sb_has_quota_ino(sbi))
sb->s_qcop = &dquot_quotactl_sysfile_ops;
else
sb->s_qcop = &f2fs_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
- if (f2fs_sb_has_quota_ino(sbi->sb)) {
+ if (f2fs_sb_has_quota_ino(sbi)) {
for (i = 0; i < MAXQUOTAS; i++) {
if (f2fs_qf_ino(sbi->sb, i))
sbi->nquota_files++;
@@ -3259,30 +3259,30 @@ try_onemore:
f2fs_build_gc_manager(sbi);
+ err = f2fs_build_stats(sbi);
+ if (err)
+ goto free_nm;
+
/* get an inode for node space */
sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
if (IS_ERR(sbi->node_inode)) {
f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
err = PTR_ERR(sbi->node_inode);
- goto free_nm;
+ goto free_stats;
}
- err = f2fs_build_stats(sbi);
- if (err)
- goto free_node_inode;
-
/* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
if (IS_ERR(root)) {
f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
err = PTR_ERR(root);
- goto free_stats;
+ goto free_node_inode;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks ||
!root->i_size || !root->i_nlink) {
iput(root);
err = -EINVAL;
- goto free_stats;
+ goto free_node_inode;
}
sb->s_root = d_make_root(root); /* allocate root dentry */
@@ -3297,7 +3297,7 @@ try_onemore:
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount */
- if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
+ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) {
err = f2fs_enable_quotas(sb);
if (err)
f2fs_msg(sb, KERN_ERR,
@@ -3369,7 +3369,7 @@ skip_recovery:
if (err)
goto free_meta;
}
- kfree(options);
+ kvfree(options);
/* recover broken superblock */
if (recovery) {
@@ -3392,7 +3392,7 @@ skip_recovery:
free_meta:
#ifdef CONFIG_QUOTA
f2fs_truncate_quota_inode_pages(sb);
- if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb))
+ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb))
f2fs_quota_off_umount(sbi->sb);
#endif
/*
@@ -3406,19 +3406,19 @@ free_meta:
free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
-free_stats:
- f2fs_destroy_stats(sbi);
free_node_inode:
f2fs_release_ino_entry(sbi, true);
truncate_inode_pages_final(NODE_MAPPING(sbi));
iput(sbi->node_inode);
+free_stats:
+ f2fs_destroy_stats(sbi);
free_nm:
f2fs_destroy_node_manager(sbi);
free_sm:
f2fs_destroy_segment_manager(sbi);
free_devices:
destroy_device_list(sbi);
- kfree(sbi->ckpt);
+ kvfree(sbi->ckpt);
free_meta_inode:
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
@@ -3428,19 +3428,19 @@ free_percpu:
destroy_percpu_info(sbi);
free_bio_info:
for (i = 0; i < NR_PAGE_TYPE; i++)
- kfree(sbi->write_io[i]);
+ kvfree(sbi->write_io[i]);
free_options:
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
- kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
- kfree(options);
+ kvfree(options);
free_sb_buf:
- kfree(raw_super);
+ kvfree(raw_super);
free_sbi:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
- kfree(sbi);
+ kvfree(sbi);
/* give only one another chance */
if (retry) {
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index b777cbdd796b..0575edbe3ed6 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -90,34 +90,34 @@ static ssize_t features_show(struct f2fs_attr *a,
if (!sb->s_bdev->bd_part)
return snprintf(buf, PAGE_SIZE, "0\n");
- if (f2fs_sb_has_encrypt(sb))
+ if (f2fs_sb_has_encrypt(sbi))
len += snprintf(buf, PAGE_SIZE - len, "%s",
"encryption");
- if (f2fs_sb_has_blkzoned(sb))
+ if (f2fs_sb_has_blkzoned(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "blkzoned");
- if (f2fs_sb_has_extra_attr(sb))
+ if (f2fs_sb_has_extra_attr(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "extra_attr");
- if (f2fs_sb_has_project_quota(sb))
+ if (f2fs_sb_has_project_quota(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "projquota");
- if (f2fs_sb_has_inode_chksum(sb))
+ if (f2fs_sb_has_inode_chksum(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "inode_checksum");
- if (f2fs_sb_has_flexible_inline_xattr(sb))
+ if (f2fs_sb_has_flexible_inline_xattr(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "flexible_inline_xattr");
- if (f2fs_sb_has_quota_ino(sb))
+ if (f2fs_sb_has_quota_ino(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "quota_ino");
- if (f2fs_sb_has_inode_crtime(sb))
+ if (f2fs_sb_has_inode_crtime(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "inode_crtime");
- if (f2fs_sb_has_lost_found(sb))
+ if (f2fs_sb_has_lost_found(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "lost_found");
- if (f2fs_sb_has_sb_chksum(sb))
+ if (f2fs_sb_has_sb_chksum(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "sb_checksum");
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
@@ -246,6 +246,11 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "migration_granularity")) {
+ if (t == 0 || t > sbi->segs_per_sec)
+ return -EINVAL;
+ }
+
if (!strcmp(a->attr.name, "trim_sections"))
return -EINVAL;
@@ -406,6 +411,7 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
@@ -460,6 +466,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(min_hot_blocks),
ATTR_LIST(min_ssr_sections),
ATTR_LIST(max_victim_search),
+ ATTR_LIST(migration_granularity),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 7261245c208d..18d5ffbc5e8c 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -288,7 +288,7 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr)
static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
unsigned int index, unsigned int len,
const char *name, struct f2fs_xattr_entry **xe,
- void **base_addr)
+ void **base_addr, int *base_size)
{
void *cur_addr, *txattr_addr, *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
@@ -299,8 +299,8 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
if (!size && !inline_size)
return -ENODATA;
- txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode),
- inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS);
+ *base_size = inline_size + size + XATTR_PADDING_SIZE;
+ txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS);
if (!txattr_addr)
return -ENOMEM;
@@ -312,8 +312,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
*xe = __find_inline_xattr(inode, txattr_addr, &last_addr,
index, len, name);
- if (*xe)
+ if (*xe) {
+ *base_size = inline_size;
goto check;
+ }
}
/* read from xattr node block */
@@ -415,7 +417,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
}
f2fs_wait_on_page_writeback(ipage ? ipage : in_page,
- NODE, true);
+ NODE, true, true);
/* no need to use xattr node block */
if (hsize <= inline_size) {
err = f2fs_truncate_xattr_node(inode);
@@ -439,7 +441,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
goto in_page_out;
}
f2fs_bug_on(sbi, new_nid);
- f2fs_wait_on_page_writeback(xpage, NODE, true);
+ f2fs_wait_on_page_writeback(xpage, NODE, true, true);
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
@@ -474,6 +476,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
int error = 0;
unsigned int size, len;
void *base_addr = NULL;
+ int base_size;
if (name == NULL)
return -EINVAL;
@@ -484,7 +487,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
down_read(&F2FS_I(inode)->i_xattr_sem);
error = lookup_all_xattrs(inode, ipage, index, len, name,
- &entry, &base_addr);
+ &entry, &base_addr, &base_size);
up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
@@ -498,6 +501,11 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (buffer) {
char *pval = entry->e_name + entry->e_name_len;
+
+ if (base_size - (pval - (char *)base_addr) < size) {
+ error = -ERANGE;
+ goto out;
+ }
memcpy(buffer, pval, size);
}
error = size;
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 78d501c1fb65..738e427e2d21 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -363,7 +363,7 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
*phys = 0;
*mapped_blocks = 0;
- if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) {
+ if (!is_fat32(sbi) && (inode->i_ino == MSDOS_ROOT_INO)) {
if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) {
*phys = sector + sbi->dir_start;
*mapped_blocks = 1;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index c8366cb8eccd..9d01db37183f 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -57,7 +57,7 @@ static inline void fat_dir_readahead(struct inode *dir, sector_t iblock,
if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1)
return;
/* root dir of FAT12/FAT16 */
- if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
+ if (!is_fat32(sbi) && (dir->i_ino == MSDOS_ROOT_INO))
return;
bh = sb_find_get_block(sb, phys);
@@ -805,7 +805,7 @@ static long fat_dir_ioctl(struct file *filp, unsigned int cmd,
return fat_generic_ioctl(filp, cmd, arg);
}
- if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
+ if (!access_ok(d1, sizeof(struct __fat_dirent[2])))
return -EFAULT;
/*
* Yes, we don't need this put_user() absolutely. However old
@@ -845,7 +845,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd,
return fat_generic_ioctl(filp, cmd, (unsigned long)arg);
}
- if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2])))
+ if (!access_ok(d1, sizeof(struct compat_dirent[2])))
return -EFAULT;
/*
* Yes, we don't need this put_user() absolutely. However old
@@ -1313,7 +1313,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
}
}
if (dir->i_ino == MSDOS_ROOT_INO) {
- if (sbi->fat_bits != 32)
+ if (!is_fat32(sbi))
goto error;
} else if (MSDOS_I(dir)->i_start == 0) {
fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)",
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 4e1b2f6df5e6..922a0c6ba46c 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -142,6 +142,34 @@ static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb)
return sb->s_fs_info;
}
+/*
+ * Functions that determine the variant of the FAT file system (i.e.,
+ * whether this is FAT12, FAT16 or FAT32.
+ */
+static inline bool is_fat12(const struct msdos_sb_info *sbi)
+{
+ return sbi->fat_bits == 12;
+}
+
+static inline bool is_fat16(const struct msdos_sb_info *sbi)
+{
+ return sbi->fat_bits == 16;
+}
+
+static inline bool is_fat32(const struct msdos_sb_info *sbi)
+{
+ return sbi->fat_bits == 32;
+}
+
+/* Maximum number of clusters */
+static inline u32 max_fat(struct super_block *sb)
+{
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+
+ return is_fat32(sbi) ? MAX_FAT32 :
+ is_fat16(sbi) ? MAX_FAT16 : MAX_FAT12;
+}
+
static inline struct msdos_inode_info *MSDOS_I(struct inode *inode)
{
return container_of(inode, struct msdos_inode_info, vfs_inode);
@@ -257,7 +285,7 @@ static inline int fat_get_start(const struct msdos_sb_info *sbi,
const struct msdos_dir_entry *de)
{
int cluster = le16_to_cpu(de->start);
- if (sbi->fat_bits == 32)
+ if (is_fat32(sbi))
cluster |= (le16_to_cpu(de->starthi) << 16);
return cluster;
}
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index f58c0cacc531..495edeafd60a 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -290,19 +290,17 @@ void fat_ent_access_init(struct super_block *sb)
mutex_init(&sbi->fat_lock);
- switch (sbi->fat_bits) {
- case 32:
+ if (is_fat32(sbi)) {
sbi->fatent_shift = 2;
sbi->fatent_ops = &fat32_ops;
- break;
- case 16:
+ } else if (is_fat16(sbi)) {
sbi->fatent_shift = 1;
sbi->fatent_ops = &fat16_ops;
- break;
- case 12:
+ } else if (is_fat12(sbi)) {
sbi->fatent_shift = -1;
sbi->fatent_ops = &fat12_ops;
- break;
+ } else {
+ fat_fs_error(sb, "invalid FAT variant, %u bits", sbi->fat_bits);
}
}
@@ -310,7 +308,7 @@ static void mark_fsinfo_dirty(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
- if (sb_rdonly(sb) || sbi->fat_bits != 32)
+ if (sb_rdonly(sb) || !is_fat32(sbi))
return;
__mark_inode_dirty(sbi->fsinfo_inode, I_DIRTY_SYNC);
@@ -327,7 +325,7 @@ static inline int fat_ent_update_ptr(struct super_block *sb,
/* Is this fatent's blocks including this entry? */
if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr)
return 0;
- if (sbi->fat_bits == 12) {
+ if (is_fat12(sbi)) {
if ((offset + 1) < sb->s_blocksize) {
/* This entry is on bhs[0]. */
if (fatent->nr_bhs == 2) {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index c0b5b5c3373b..79bb0e73a65f 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -686,7 +686,7 @@ static void fat_set_state(struct super_block *sb,
b = (struct fat_boot_sector *) bh->b_data;
- if (sbi->fat_bits == 32) {
+ if (is_fat32(sbi)) {
if (set)
b->fat32.state |= FAT_STATE_DIRTY;
else
@@ -1396,7 +1396,7 @@ static int fat_read_root(struct inode *inode)
inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO);
inode->i_op = sbi->dir_ops;
inode->i_fop = &fat_dir_operations;
- if (sbi->fat_bits == 32) {
+ if (is_fat32(sbi)) {
MSDOS_I(inode)->i_start = sbi->root_cluster;
error = fat_calc_dir_size(inode);
if (error < 0)
@@ -1423,7 +1423,7 @@ static unsigned long calc_fat_clusters(struct super_block *sb)
struct msdos_sb_info *sbi = MSDOS_SB(sb);
/* Divide first to avoid overflow */
- if (sbi->fat_bits != 12) {
+ if (!is_fat12(sbi)) {
unsigned long ent_per_sec = sb->s_blocksize * 8 / sbi->fat_bits;
return ent_per_sec * sbi->fat_length;
}
@@ -1743,7 +1743,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
}
/* interpret volume ID as a little endian 32 bit integer */
- if (sbi->fat_bits == 32)
+ if (is_fat32(sbi))
sbi->vol_id = bpb.fat32_vol_id;
else /* fat 16 or 12 */
sbi->vol_id = bpb.fat16_vol_id;
@@ -1769,11 +1769,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
total_clusters = (total_sectors - sbi->data_start) / sbi->sec_per_clus;
- if (sbi->fat_bits != 32)
+ if (!is_fat32(sbi))
sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12;
/* some OSes set FAT_STATE_DIRTY and clean it on unmount. */
- if (sbi->fat_bits == 32)
+ if (is_fat32(sbi))
sbi->dirty = bpb.fat32_state & FAT_STATE_DIRTY;
else /* fat 16 or 12 */
sbi->dirty = bpb.fat16_state & FAT_STATE_DIRTY;
@@ -1781,7 +1781,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
/* check that FAT table does not overflow */
fat_clusters = calc_fat_clusters(sb);
total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
- if (total_clusters > MAX_FAT(sb)) {
+ if (total_clusters > max_fat(sb)) {
if (!silent)
fat_msg(sb, KERN_ERR, "count of clusters too big (%u)",
total_clusters);
@@ -1803,11 +1803,15 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
fat_ent_access_init(sb);
/*
- * The low byte of FAT's first entry must have same value with
- * media-field. But in real world, too many devices is
- * writing wrong value. So, removed that validity check.
+ * The low byte of the first FAT entry must have the same value as
+ * the media field of the boot sector. But in real world, too many
+ * devices are writing wrong values. So, removed that validity check.
*
- * if (FAT_FIRST_ENT(sb, media) != first)
+ * The removed check compared the first FAT entry to a value dependent
+ * on the media field like this:
+ * == (0x0F00 | media), for FAT12
+ * == (0XFF00 | media), for FAT16
+ * == (0x0FFFFF | media), for FAT32
*/
error = -EINVAL;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index fce0a76f3f1e..4fc950bb6433 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -64,7 +64,7 @@ int fat_clusters_flush(struct super_block *sb)
struct buffer_head *bh;
struct fat_boot_fsinfo *fsinfo;
- if (sbi->fat_bits != 32)
+ if (!is_fat32(sbi))
return 0;
bh = sb_bread(sb, sbi->fsinfo_sector);
diff --git a/fs/file.c b/fs/file.c
index 7ffd6e9d103d..3209ee271c41 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -158,7 +158,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
* or have finished their rcu_read_lock_sched() section.
*/
if (atomic_read(&files->count) > 1)
- synchronize_sched();
+ synchronize_rcu();
spin_lock(&files->file_lock);
if (!new_fdt)
@@ -640,6 +640,35 @@ out_unlock:
}
EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
+/*
+ * variant of __close_fd that gets a ref on the file for later fput
+ */
+int __close_fd_get_file(unsigned int fd, struct file **res)
+{
+ struct files_struct *files = current->files;
+ struct file *file;
+ struct fdtable *fdt;
+
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ if (fd >= fdt->max_fds)
+ goto out_unlock;
+ file = fdt->fd[fd];
+ if (!file)
+ goto out_unlock;
+ rcu_assign_pointer(fdt->fd[fd], NULL);
+ __put_unused_fd(files, fd);
+ spin_unlock(&files->file_lock);
+ get_file(file);
+ *res = file;
+ return filp_close(file, files);
+
+out_unlock:
+ spin_unlock(&files->file_lock);
+ *res = NULL;
+ return -ENOENT;
+}
+
void do_close_on_exec(struct files_struct *files)
{
unsigned i;
diff --git a/fs/file_table.c b/fs/file_table.c
index e49af4caf15d..5679e7fcb6b0 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -380,10 +380,11 @@ void __init files_init(void)
void __init files_maxfiles_init(void)
{
unsigned long n;
- unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2;
+ unsigned long nr_pages = totalram_pages();
+ unsigned long memreserve = (nr_pages - nr_free_pages()) * 3/2;
- memreserve = min(memreserve, totalram_pages - 1);
- n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10;
+ memreserve = min(memreserve, nr_pages - 1);
+ n = ((nr_pages - memreserve) * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = max_t(unsigned long, n, NR_FILE);
}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 568abed20eb2..76baaa6be393 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -824,7 +824,7 @@ static const struct super_operations fuse_super_operations = {
static void sanitize_global_limit(unsigned *limit)
{
if (*limit == 0)
- *limit = ((totalram_pages << PAGE_SHIFT) >> 13) /
+ *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) /
sizeof(struct fuse_req);
if (*limit >= 1 << 16)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 8afbb35559b9..05dd78f4b2b3 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -820,10 +820,10 @@ out:
* @page: the page that's being released
* @gfp_mask: passed from Linux VFS, ignored by us
*
- * Call try_to_free_buffers() if the buffers in this page can be
- * released.
+ * Calls try_to_free_buffers() to free the buffers and put the page if the
+ * buffers can be released.
*
- * Returns: 0
+ * Returns: 1 if the page was put or else 0
*/
int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
@@ -930,14 +930,14 @@ static const struct address_space_operations gfs2_jdata_aops = {
void gfs2_set_aops(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
- if (gfs2_is_writeback(ip))
+ if (gfs2_is_jdata(ip))
+ inode->i_mapping->a_ops = &gfs2_jdata_aops;
+ else if (gfs2_is_writeback(sdp))
inode->i_mapping->a_ops = &gfs2_writeback_aops;
- else if (gfs2_is_ordered(ip))
+ else if (gfs2_is_ordered(sdp))
inode->i_mapping->a_ops = &gfs2_ordered_aops;
- else if (gfs2_is_jdata(ip))
- inode->i_mapping->a_ops = &gfs2_jdata_aops;
else
BUG();
}
-
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 9a4a15d646eb..02b2646d84b3 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -14,6 +14,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/iomap.h>
+#include <linux/ktime.h>
#include "gfs2.h"
#include "incore.h"
@@ -2083,6 +2084,8 @@ static int do_grow(struct inode *inode, u64 size)
}
error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
+ (unstuff &&
+ gfs2_is_jdata(ip) ? RES_JDATA : 0) +
(sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
0 : RES_QUOTA), 0);
if (error)
@@ -2248,7 +2251,9 @@ int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
unsigned int shift = sdp->sd_sb.sb_bsize_shift;
u64 size;
int rc;
+ ktime_t start, end;
+ start = ktime_get();
lblock_stop = i_size_read(jd->jd_inode) >> shift;
size = (lblock_stop - lblock) << shift;
jd->nr_extents = 0;
@@ -2268,8 +2273,9 @@ int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
lblock += (bh.b_size >> ip->i_inode.i_blkbits);
} while(size > 0);
- fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
- jd->nr_extents);
+ end = ktime_get();
+ fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid,
+ jd->nr_extents, ktime_ms_delta(end, start));
return 0;
fail:
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 45a17b770d97..a2dea5bc0427 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1199,13 +1199,13 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
mutex_lock(&fp->f_fl_mutex);
if (gfs2_holder_initialized(fl_gh)) {
+ struct file_lock request;
if (fl_gh->gh_state == state)
goto out;
- locks_lock_file_wait(file,
- &(struct file_lock) {
- .fl_type = F_UNLCK,
- .fl_flags = FL_FLOCK
- });
+ locks_init_lock(&request);
+ request.fl_type = F_UNLCK;
+ request.fl_flags = FL_FLOCK;
+ locks_lock_file_wait(file, &request);
gfs2_glock_dq(fl_gh);
gfs2_holder_reinit(state, flags, fl_gh);
} else {
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 05431324b262..b92740edc416 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1777,7 +1777,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*
*/
-void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
+void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
unsigned long long dtime;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 5e12220cc0c2..8949bf28b249 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -202,7 +202,7 @@ extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
struct gfs2_holder *gh);
extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
-extern void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
+extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl);
#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0)
extern __printf(2, 3)
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index c63bee9adb6a..f15b4c57c4bd 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,6 +28,7 @@
#include "util.h"
#include "trans.h"
#include "dir.h"
+#include "lops.h"
struct workqueue_struct *gfs2_freeze_wq;
@@ -466,17 +467,25 @@ static int inode_go_lock(struct gfs2_holder *gh)
*
*/
-static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
+static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl)
{
- const struct gfs2_inode *ip = gl->gl_object;
+ struct gfs2_inode *ip = gl->gl_object;
+ struct inode *inode = &ip->i_inode;
+ unsigned long nrpages;
+
if (ip == NULL)
return;
- gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n",
+
+ xa_lock_irq(&inode->i_data.i_pages);
+ nrpages = inode->i_data.nrpages;
+ xa_unlock_irq(&inode->i_data.i_pages);
+
+ gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu p:%lu\n",
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr,
IF2DT(ip->i_inode.i_mode), ip->i_flags,
(unsigned int)ip->i_diskflags,
- (unsigned long long)i_size_read(&ip->i_inode));
+ (unsigned long long)i_size_read(inode), nrpages);
}
/**
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 888b62cfd6d1..e10e0b0a7cd5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -165,7 +165,6 @@ struct gfs2_bufdata {
u64 bd_blkno;
struct list_head bd_list;
- const struct gfs2_log_operations *bd_ops;
struct gfs2_trans *bd_tr;
struct list_head bd_ail_st_list;
@@ -244,7 +243,7 @@ struct gfs2_glock_operations {
int (*go_demote_ok) (const struct gfs2_glock *gl);
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
- void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
+ void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl);
void (*go_callback)(struct gfs2_glock *gl, bool remote);
const int go_type;
const unsigned long go_flags;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 648f0ca1ad57..998051c4aea7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -744,17 +744,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
the gfs2 structures. */
if (default_acl) {
error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+ if (error)
+ goto fail_gunlock3;
posix_acl_release(default_acl);
+ default_acl = NULL;
}
if (acl) {
- if (!error)
- error = __gfs2_set_acl(inode, acl, ACL_TYPE_ACCESS);
+ error = __gfs2_set_acl(inode, acl, ACL_TYPE_ACCESS);
+ if (error)
+ goto fail_gunlock3;
posix_acl_release(acl);
+ acl = NULL;
}
- if (error)
- goto fail_gunlock3;
-
error = security_inode_init_security(&ip->i_inode, &dip->i_inode, name,
&gfs2_initxattrs, NULL);
if (error)
@@ -789,10 +791,8 @@ fail_free_inode:
}
gfs2_rsqa_delete(ip, NULL);
fail_free_acls:
- if (default_acl)
- posix_acl_release(default_acl);
- if (acl)
- posix_acl_release(acl);
+ posix_acl_release(default_acl);
+ posix_acl_release(acl);
fail_gunlock:
gfs2_dir_no_add(&da);
gfs2_glock_dq_uninit(ghs);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index b5b6341a4f5c..793808263c6d 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -30,16 +30,14 @@ static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
return ip->i_diskflags & GFS2_DIF_JDATA;
}
-static inline int gfs2_is_writeback(const struct gfs2_inode *ip)
+static inline bool gfs2_is_ordered(const struct gfs2_sbd *sdp)
{
- const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- return (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK) && !gfs2_is_jdata(ip);
+ return sdp->sd_args.ar_data == GFS2_DATA_ORDERED;
}
-static inline int gfs2_is_ordered(const struct gfs2_inode *ip)
+static inline bool gfs2_is_writeback(const struct gfs2_sbd *sdp)
{
- const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- return (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) && !gfs2_is_jdata(ip);
+ return sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK;
}
static inline int gfs2_is_dir(const struct gfs2_inode *ip)
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 99dd58694ba1..5bfaf381921a 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -605,7 +605,6 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
bd->bd_blkno = bh->b_blocknr;
gfs2_remove_from_ail(bd); /* drops ref on bh */
bd->bd_bh = NULL;
- bd->bd_ops = &gfs2_revoke_lops;
sdp->sd_log_num_revoke++;
atomic_inc(&gl->gl_revokes);
set_bit(GLF_LFLUSH, &gl->gl_flags);
@@ -734,7 +733,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
lh->lh_crc = cpu_to_be32(crc);
gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr);
- gfs2_log_flush_bio(sdp, REQ_OP_WRITE, op_flags);
+ gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags);
log_flush_wait(sdp);
}
@@ -811,7 +810,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
gfs2_ordered_write(sdp);
lops_before_commit(sdp, tr);
- gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
+ gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
if (sdp->sd_log_head != sdp->sd_log_flush_head) {
log_flush_wait(sdp);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 20241436126d..1bc9bd444b28 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -51,12 +51,11 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
{
- struct gfs2_sbd *sdp;
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- if (!gfs2_is_ordered(ip))
+ if (gfs2_is_jdata(ip) || !gfs2_is_ordered(sdp))
return;
- sdp = GFS2_SB(&ip->i_inode);
if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
spin_lock(&sdp->sd_ordered_lock);
if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4c7069b8f3c1..94dcab655bc0 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -17,7 +17,9 @@
#include <linux/bio.h>
#include <linux/fs.h>
#include <linux/list_sort.h>
+#include <linux/blkdev.h>
+#include "bmap.h"
#include "dir.h"
#include "gfs2.h"
#include "incore.h"
@@ -193,7 +195,6 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
/**
* gfs2_end_log_write - end of i/o to the log
* @bio: The bio
- * @error: Status of i/o request
*
* Each bio_vec contains either data from the pagecache or data
* relating to the log itself. Here we iterate over the bio_vec
@@ -228,83 +229,86 @@ static void gfs2_end_log_write(struct bio *bio)
}
/**
- * gfs2_log_flush_bio - Submit any pending log bio
- * @sdp: The superblock
- * @op: REQ_OP
- * @op_flags: req_flag_bits
+ * gfs2_log_submit_bio - Submit any pending log bio
+ * @biop: Address of the bio pointer
+ * @opf: REQ_OP | op_flags
*
* Submit any pending part-built or full bio to the block device. If
* there is no pending bio, then this is a no-op.
*/
-void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags)
+void gfs2_log_submit_bio(struct bio **biop, int opf)
{
- if (sdp->sd_log_bio) {
+ struct bio *bio = *biop;
+ if (bio) {
+ struct gfs2_sbd *sdp = bio->bi_private;
atomic_inc(&sdp->sd_log_in_flight);
- bio_set_op_attrs(sdp->sd_log_bio, op, op_flags);
- submit_bio(sdp->sd_log_bio);
- sdp->sd_log_bio = NULL;
+ bio->bi_opf = opf;
+ submit_bio(bio);
+ *biop = NULL;
}
}
/**
- * gfs2_log_alloc_bio - Allocate a new bio for log writing
- * @sdp: The superblock
- * @blkno: The next device block number we want to write to
+ * gfs2_log_alloc_bio - Allocate a bio
+ * @sdp: The super block
+ * @blkno: The device block number we want to write to
+ * @end_io: The bi_end_io callback
*
- * This should never be called when there is a cached bio in the
- * super block. When it returns, there will be a cached bio in the
- * super block which will have as many bio_vecs as the device is
- * happy to handle.
+ * Allocate a new bio, initialize it with the given parameters and return it.
*
- * Returns: Newly allocated bio
+ * Returns: The newly allocated bio
*/
-static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno)
+static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
+ bio_end_io_t *end_io)
{
struct super_block *sb = sdp->sd_vfs;
- struct bio *bio;
-
- BUG_ON(sdp->sd_log_bio);
+ struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
- bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
bio_set_dev(bio, sb->s_bdev);
- bio->bi_end_io = gfs2_end_log_write;
+ bio->bi_end_io = end_io;
bio->bi_private = sdp;
- sdp->sd_log_bio = bio;
-
return bio;
}
/**
* gfs2_log_get_bio - Get cached log bio, or allocate a new one
- * @sdp: The superblock
+ * @sdp: The super block
* @blkno: The device block number we want to write to
+ * @bio: The bio to get or allocate
+ * @op: REQ_OP
+ * @end_io: The bi_end_io callback
+ * @flush: Always flush the current bio and allocate a new one?
*
* If there is a cached bio, then if the next block number is sequential
* with the previous one, return it, otherwise flush the bio to the
- * device. If there is not a cached bio, or we just flushed it, then
+ * device. If there is no cached bio, or we just flushed it, then
* allocate a new one.
*
* Returns: The bio to use for log writes
*/
-static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
+static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
+ struct bio **biop, int op,
+ bio_end_io_t *end_io, bool flush)
{
- struct bio *bio = sdp->sd_log_bio;
- u64 nblk;
+ struct bio *bio = *biop;
if (bio) {
+ u64 nblk;
+
nblk = bio_end_sector(bio);
nblk >>= sdp->sd_fsb2bb_shift;
- if (blkno == nblk)
+ if (blkno == nblk && !flush)
return bio;
- gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
+ gfs2_log_submit_bio(biop, op);
}
- return gfs2_log_alloc_bio(sdp, blkno);
+ *biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
+ return *biop;
}
/**
@@ -326,11 +330,12 @@ void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
struct bio *bio;
int ret;
- bio = gfs2_log_get_bio(sdp, blkno);
+ bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio, REQ_OP_WRITE,
+ gfs2_end_log_write, false);
ret = bio_add_page(bio, page, size, offset);
if (ret == 0) {
- gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
- bio = gfs2_log_alloc_bio(sdp, blkno);
+ bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio,
+ REQ_OP_WRITE, gfs2_end_log_write, true);
ret = bio_add_page(bio, page, size, offset);
WARN_ON(ret == 0);
}
@@ -370,6 +375,184 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
gfs2_log_bmap(sdp));
}
+/**
+ * gfs2_end_log_read - end I/O callback for reads from the log
+ * @bio: The bio
+ *
+ * Simply unlock the pages in the bio. The main thread will wait on them and
+ * process them in order as necessary.
+ */
+
+static void gfs2_end_log_read(struct bio *bio)
+{
+ struct page *page;
+ struct bio_vec *bvec;
+ int i;
+
+ bio_for_each_segment_all(bvec, bio, i) {
+ page = bvec->bv_page;
+ if (bio->bi_status) {
+ int err = blk_status_to_errno(bio->bi_status);
+
+ SetPageError(page);
+ mapping_set_error(page->mapping, err);
+ }
+ unlock_page(page);
+ }
+
+ bio_put(bio);
+}
+
+/**
+ * gfs2_jhead_pg_srch - Look for the journal head in a given page.
+ * @jd: The journal descriptor
+ * @page: The page to look in
+ *
+ * Returns: 1 if found, 0 otherwise.
+ */
+
+static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
+ struct gfs2_log_header_host *head,
+ struct page *page)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ struct gfs2_log_header_host uninitialized_var(lh);
+ void *kaddr = kmap_atomic(page);
+ unsigned int offset;
+ bool ret = false;
+
+ for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
+ if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
+ if (lh.lh_sequence > head->lh_sequence)
+ *head = lh;
+ else {
+ ret = true;
+ break;
+ }
+ }
+ }
+ kunmap_atomic(kaddr);
+ return ret;
+}
+
+/**
+ * gfs2_jhead_process_page - Search/cleanup a page
+ * @jd: The journal descriptor
+ * @index: Index of the page to look into
+ * @done: If set, perform only cleanup, else search and set if found.
+ *
+ * Find the page with 'index' in the journal's mapping. Search the page for
+ * the journal head if requested (cleanup == false). Release refs on the
+ * page so the page cache can reclaim it (put_page() twice). We grabbed a
+ * reference on this page two times, first when we did a find_or_create_page()
+ * to obtain the page to add it to the bio and second when we do a
+ * find_get_page() here to get the page to wait on while I/O on it is being
+ * completed.
+ * This function is also used to free up a page we might've grabbed but not
+ * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
+ * submitted the I/O, but we already found the jhead so we only need to drop
+ * our references to the page.
+ */
+
+static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
+ struct gfs2_log_header_host *head,
+ bool *done)
+{
+ struct page *page;
+
+ page = find_get_page(jd->jd_inode->i_mapping, index);
+ wait_on_page_locked(page);
+
+ if (PageError(page))
+ *done = true;
+
+ if (!*done)
+ *done = gfs2_jhead_pg_srch(jd, head, page);
+
+ put_page(page); /* Once for find_get_page */
+ put_page(page); /* Once more for find_or_create_page */
+}
+
+/**
+ * gfs2_find_jhead - find the head of a log
+ * @jd: The journal descriptor
+ * @head: The log descriptor for the head of the log is returned here
+ *
+ * Do a search of a journal by reading it in large chunks using bios and find
+ * the valid log entry with the highest sequence number. (i.e. the log head)
+ *
+ * Returns: 0 on success, errno otherwise
+ */
+
+int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ struct address_space *mapping = jd->jd_inode->i_mapping;
+ struct gfs2_journal_extent *je;
+ u32 block, read_idx = 0, submit_idx = 0, index = 0;
+ int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
+ int blocks_per_page = 1 << shift, sz, ret = 0;
+ struct bio *bio = NULL;
+ struct page *page;
+ bool done = false;
+ errseq_t since;
+
+ memset(head, 0, sizeof(*head));
+ if (list_empty(&jd->extent_list))
+ gfs2_map_journal_extents(sdp, jd);
+
+ since = filemap_sample_wb_err(mapping);
+ list_for_each_entry(je, &jd->extent_list, list) {
+ for (block = 0; block < je->blocks; block += blocks_per_page) {
+ index = (je->lblock + block) >> shift;
+
+ page = find_or_create_page(mapping, index, GFP_NOFS);
+ if (!page) {
+ ret = -ENOMEM;
+ done = true;
+ goto out;
+ }
+
+ if (bio) {
+ sz = bio_add_page(bio, page, PAGE_SIZE, 0);
+ if (sz == PAGE_SIZE)
+ goto page_added;
+ submit_idx = index;
+ submit_bio(bio);
+ bio = NULL;
+ }
+
+ bio = gfs2_log_alloc_bio(sdp,
+ je->dblock + (index << shift),
+ gfs2_end_log_read);
+ bio->bi_opf = REQ_OP_READ;
+ sz = bio_add_page(bio, page, PAGE_SIZE, 0);
+ gfs2_assert_warn(sdp, sz == PAGE_SIZE);
+
+page_added:
+ if (submit_idx <= read_idx + BIO_MAX_PAGES) {
+ /* Keep at least one bio in flight */
+ continue;
+ }
+
+ gfs2_jhead_process_page(jd, read_idx++, head, &done);
+ if (done)
+ goto out; /* found */
+ }
+ }
+
+out:
+ if (bio)
+ submit_bio(bio);
+ while (read_idx <= index)
+ gfs2_jhead_process_page(jd, read_idx++, head, &done);
+
+ if (!ret)
+ ret = filemap_check_wb_err(mapping, since);
+
+ return ret;
+}
+
static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
u32 ld_length, u32 ld_data1)
{
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index e4949394f054..331160fc568b 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -30,8 +30,10 @@ extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp);
extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
unsigned size, unsigned offset, u64 blkno);
extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
-extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags);
+extern void gfs2_log_submit_bio(struct bio **biop, int opf);
extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
+extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
+ struct gfs2_log_header_host *head);
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
{
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b041cb8ae383..1179763f6370 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -41,6 +41,7 @@
#include "dir.h"
#include "meta_io.h"
#include "trace_gfs2.h"
+#include "lops.h"
#define DO 0
#define UNDO 1
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 0f501f938d1c..7389e445a7a7 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -120,6 +120,35 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd)
}
}
+int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
+ unsigned int blkno, struct gfs2_log_header_host *head)
+{
+ u32 hash, crc;
+
+ if (lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
+ lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
+ (blkno && be32_to_cpu(lh->lh_blkno) != blkno))
+ return 1;
+
+ hash = crc32(~0, lh, LH_V1_SIZE - 4);
+ hash = ~crc32_le_shift(hash, 4); /* assume lh_hash is zero */
+
+ if (be32_to_cpu(lh->lh_hash) != hash)
+ return 1;
+
+ crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
+ sdp->sd_sb.sb_bsize - LH_V1_SIZE - 4);
+
+ if ((lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc))
+ return 1;
+
+ head->lh_sequence = be64_to_cpu(lh->lh_sequence);
+ head->lh_flags = be32_to_cpu(lh->lh_flags);
+ head->lh_tail = be32_to_cpu(lh->lh_tail);
+ head->lh_blkno = be32_to_cpu(lh->lh_blkno);
+
+ return 0;
+}
/**
* get_log_header - read the log header for a given segment
* @jd: the journal
@@ -137,159 +166,18 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd)
static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
struct gfs2_log_header_host *head)
{
- struct gfs2_log_header *lh;
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct buffer_head *bh;
- u32 hash, crc;
int error;
error = gfs2_replay_read_block(jd, blk, &bh);
if (error)
return error;
- lh = (void *)bh->b_data;
-
- hash = crc32(~0, lh, LH_V1_SIZE - 4);
- hash = ~crc32_le_shift(hash, 4); /* assume lh_hash is zero */
-
- crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
- bh->b_size - LH_V1_SIZE - 4);
-
- error = lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
- lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
- be32_to_cpu(lh->lh_blkno) != blk ||
- be32_to_cpu(lh->lh_hash) != hash ||
- (lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc);
+ error = __get_log_header(sdp, (const struct gfs2_log_header *)bh->b_data,
+ blk, head);
brelse(bh);
- if (!error) {
- head->lh_sequence = be64_to_cpu(lh->lh_sequence);
- head->lh_flags = be32_to_cpu(lh->lh_flags);
- head->lh_tail = be32_to_cpu(lh->lh_tail);
- head->lh_blkno = be32_to_cpu(lh->lh_blkno);
- }
- return error;
-}
-
-/**
- * find_good_lh - find a good log header
- * @jd: the journal
- * @blk: the segment to start searching from
- * @lh: the log header to fill in
- * @forward: if true search forward in the log, else search backward
- *
- * Call get_log_header() to get a log header for a segment, but if the
- * segment is bad, either scan forward or backward until we find a good one.
- *
- * Returns: errno
- */
-
-static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
- struct gfs2_log_header_host *head)
-{
- unsigned int orig_blk = *blk;
- int error;
-
- for (;;) {
- error = get_log_header(jd, *blk, head);
- if (error <= 0)
- return error;
-
- if (++*blk == jd->jd_blocks)
- *blk = 0;
-
- if (*blk == orig_blk) {
- gfs2_consist_inode(GFS2_I(jd->jd_inode));
- return -EIO;
- }
- }
-}
-
-/**
- * jhead_scan - make sure we've found the head of the log
- * @jd: the journal
- * @head: this is filled in with the log descriptor of the head
- *
- * At this point, seg and lh should be either the head of the log or just
- * before. Scan forward until we find the head.
- *
- * Returns: errno
- */
-
-static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
-{
- unsigned int blk = head->lh_blkno;
- struct gfs2_log_header_host lh;
- int error;
-
- for (;;) {
- if (++blk == jd->jd_blocks)
- blk = 0;
-
- error = get_log_header(jd, blk, &lh);
- if (error < 0)
- return error;
- if (error == 1)
- continue;
-
- if (lh.lh_sequence == head->lh_sequence) {
- gfs2_consist_inode(GFS2_I(jd->jd_inode));
- return -EIO;
- }
- if (lh.lh_sequence < head->lh_sequence)
- break;
-
- *head = lh;
- }
-
- return 0;
-}
-
-/**
- * gfs2_find_jhead - find the head of a log
- * @jd: the journal
- * @head: the log descriptor for the head of the log is returned here
- *
- * Do a binary search of a journal and find the valid log entry with the
- * highest sequence number. (i.e. the log head)
- *
- * Returns: errno
- */
-
-int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
-{
- struct gfs2_log_header_host lh_1, lh_m;
- u32 blk_1, blk_2, blk_m;
- int error;
-
- blk_1 = 0;
- blk_2 = jd->jd_blocks - 1;
-
- for (;;) {
- blk_m = (blk_1 + blk_2) / 2;
-
- error = find_good_lh(jd, &blk_1, &lh_1);
- if (error)
- return error;
-
- error = find_good_lh(jd, &blk_m, &lh_m);
- if (error)
- return error;
-
- if (blk_1 == blk_m || blk_m == blk_2)
- break;
-
- if (lh_1.lh_sequence <= lh_m.lh_sequence)
- blk_1 = blk_m;
- else
- blk_2 = blk_m;
- }
-
- error = jhead_scan(jd, &lh_1);
- if (error)
- return error;
-
- *head = lh_1;
-
return error;
}
@@ -460,6 +348,8 @@ void gfs2_recover_func(struct work_struct *work)
if (error)
goto fail_gunlock_ji;
t_jhd = ktime_get();
+ fs_info(sdp, "jid=%u: Journal head lookup took %lldms\n", jd->jd_jid,
+ ktime_ms_delta(t_jhd, t_jlck));
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 11fdfab4bf99..99575ab81202 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -27,10 +27,11 @@ extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
extern void gfs2_revoke_clean(struct gfs2_jdesc *jd);
-extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
- struct gfs2_log_header_host *head);
extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
extern void gfs2_recover_func(struct work_struct *work);
+extern int __get_log_header(struct gfs2_sbd *sdp,
+ const struct gfs2_log_header *lh, unsigned int blkno,
+ struct gfs2_log_header_host *head);
#endif /* __RECOVERY_DOT_H__ */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index b08a530433ad..831d7cb5a49c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1780,9 +1780,9 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
goto next_iter;
}
if (ret == -E2BIG) {
+ n += rbm->bii - initial_bii;
rbm->bii = 0;
rbm->offset = 0;
- n += (rbm->bii - initial_bii);
goto res_covered_end_of_rgrp;
}
return ret;
@@ -2256,7 +2256,7 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
*
*/
-void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
+void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
{
struct gfs2_rgrpd *rgd = gl->gl_object;
struct gfs2_blkreserv *trs;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b596c3d17988..499079a9dbbe 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -72,7 +72,7 @@ extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist);
extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
-extern void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
+extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl);
extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
struct buffer_head *bh,
const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ca71163ff7cf..d4b11c903971 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -45,6 +45,7 @@
#include "util.h"
#include "sys.h"
#include "xattr.h"
+#include "lops.h"
#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 423bc2d03dd8..cd9a94a6b5bb 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -124,15 +124,13 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
}
static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
- struct buffer_head *bh,
- const struct gfs2_log_operations *lops)
+ struct buffer_head *bh)
{
struct gfs2_bufdata *bd;
bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
bd->bd_bh = bh;
bd->bd_gl = gl;
- bd->bd_ops = lops;
INIT_LIST_HEAD(&bd->bd_list);
bh->b_private = bd;
return bd;
@@ -169,7 +167,7 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
gfs2_log_unlock(sdp);
unlock_buffer(bh);
if (bh->b_private == NULL)
- bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops);
+ bd = gfs2_alloc_bufdata(gl, bh);
else
bd = bh->b_private;
lock_buffer(bh);
@@ -210,7 +208,7 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
unlock_buffer(bh);
lock_page(bh->b_page);
if (bh->b_private == NULL)
- bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops);
+ bd = gfs2_alloc_bufdata(gl, bh);
else
bd = bh->b_private;
unlock_page(bh->b_page);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index f37662675c3a..29a9dcfbe81f 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -565,6 +565,7 @@ const struct inode_operations hfsplus_dir_inode_operations = {
.symlink = hfsplus_symlink,
.mknod = hfsplus_mknod,
.rename = hfsplus_rename,
+ .getattr = hfsplus_getattr,
.listxattr = hfsplus_listxattr,
};
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index dd7ad9f13e3a..b8471bf05def 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -488,6 +488,8 @@ void hfsplus_inode_write_fork(struct inode *inode,
struct hfsplus_fork_raw *fork);
int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd);
int hfsplus_cat_write_inode(struct inode *inode);
+int hfsplus_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags);
int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d7ab9d8c4b67..d131c8ea7eb6 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -270,6 +270,26 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
return 0;
}
+int hfsplus_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+ struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
+
+ if (inode->i_flags & S_APPEND)
+ stat->attributes |= STATX_ATTR_APPEND;
+ if (inode->i_flags & S_IMMUTABLE)
+ stat->attributes |= STATX_ATTR_IMMUTABLE;
+ if (hip->userflags & HFSPLUS_FLG_NODUMP)
+ stat->attributes |= STATX_ATTR_NODUMP;
+
+ stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE |
+ STATX_ATTR_NODUMP;
+
+ generic_fillattr(inode, stat);
+ return 0;
+}
+
int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync)
{
@@ -329,6 +349,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
static const struct inode_operations hfsplus_file_inode_operations = {
.setattr = hfsplus_setattr,
+ .getattr = hfsplus_getattr,
.listxattr = hfsplus_listxattr,
};
diff --git a/fs/inode.c b/fs/inode.c
index 35d2108d567c..0cd47fe0dbe5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2149,7 +2149,9 @@ EXPORT_SYMBOL(timespec64_trunc);
*/
struct timespec64 current_time(struct inode *inode)
{
- struct timespec64 now = current_kernel_time64();
+ struct timespec64 now;
+
+ ktime_get_coarse_real_ts64(&now);
if (unlikely(!inode->i_sb)) {
WARN(1, "current_time() called with uninitialized super_block in the inode");
diff --git a/fs/ioctl.c b/fs/ioctl.c
index d64f622cac8b..fef3a6bf7c78 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -203,7 +203,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
fieinfo.fi_extents_start = ufiemap->fm_extents;
if (fiemap.fm_extent_count != 0 &&
- !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start,
+ !access_ok(fieinfo.fi_extents_start,
fieinfo.fi_extents_max * sizeof(struct fiemap_extent)))
return -EFAULT;
diff --git a/fs/iomap.c b/fs/iomap.c
index 5bc172f3dfe8..a3088fae567b 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -116,12 +116,6 @@ iomap_page_create(struct inode *inode, struct page *page)
atomic_set(&iop->read_count, 0);
atomic_set(&iop->write_count, 0);
bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
-
- /*
- * migrate_page_move_mapping() assumes that pages with private data have
- * their count elevated by 1.
- */
- get_page(page);
set_page_private(page, (unsigned long)iop);
SetPagePrivate(page);
return iop;
@@ -138,7 +132,6 @@ iomap_page_release(struct page *page)
WARN_ON_ONCE(atomic_read(&iop->write_count));
ClearPagePrivate(page);
set_page_private(page, 0);
- put_page(page);
kfree(iop);
}
@@ -499,16 +492,29 @@ done:
}
EXPORT_SYMBOL_GPL(iomap_readpages);
+/*
+ * iomap_is_partially_uptodate checks whether blocks within a page are
+ * uptodate or not.
+ *
+ * Returns true if all blocks which correspond to a file portion
+ * we want to read within the page are uptodate.
+ */
int
iomap_is_partially_uptodate(struct page *page, unsigned long from,
unsigned long count)
{
struct iomap_page *iop = to_iomap_page(page);
struct inode *inode = page->mapping->host;
- unsigned first = from >> inode->i_blkbits;
- unsigned last = (from + count - 1) >> inode->i_blkbits;
+ unsigned len, first, last;
unsigned i;
+ /* Limit range to one page */
+ len = min_t(unsigned, PAGE_SIZE - from, count);
+
+ /* First and last blocks in range within page */
+ first = from >> inode->i_blkbits;
+ last = (from + len - 1) >> inode->i_blkbits;
+
if (iop) {
for (i = first; i <= last; i++)
if (!test_bit(i, iop->uptodate))
@@ -557,7 +563,7 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage,
{
int ret;
- ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+ ret = migrate_page_move_mapping(mapping, newpage, page, mode, 0);
if (ret != MIGRATEPAGE_SUCCESS)
return ret;
@@ -1537,7 +1543,7 @@ static void iomap_dio_bio_end_io(struct bio *bio)
if (dio->wait_for_completion) {
struct task_struct *waiter = dio->submit.waiter;
WRITE_ONCE(dio->submit.waiter, NULL);
- wake_up_process(waiter);
+ blk_wake_io_task(waiter);
} else if (dio->flags & IOMAP_DIO_WRITE) {
struct inode *inode = file_inode(dio->iocb->ki_filp);
@@ -1565,6 +1571,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
unsigned len)
{
struct page *page = ZERO_PAGE(0);
+ int flags = REQ_SYNC | REQ_IDLE;
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, 1);
@@ -1573,9 +1580,12 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
+ if (dio->iocb->ki_flags & IOCB_HIPRI)
+ flags |= REQ_HIPRI;
+
get_page(page);
__bio_add_page(bio, page, len, 0);
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
atomic_inc(&dio->ref);
return submit_bio(bio);
@@ -1681,6 +1691,9 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
bio_set_pages_dirty(bio);
}
+ if (dio->iocb->ki_flags & IOCB_HIPRI)
+ bio->bi_opf |= REQ_HIPRI;
+
iov_iter_advance(dio->submit.iter, n);
dio->size += n;
@@ -1915,7 +1928,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (!(iocb->ki_flags & IOCB_HIPRI) ||
!dio->submit.last_queue ||
!blk_poll(dio->submit.last_queue,
- dio->submit.cookie))
+ dio->submit.cookie, true))
io_schedule();
}
__set_current_state(TASK_RUNNING);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 150cc030b4d7..2eb55c3361a8 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -439,6 +439,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
finish_wait(&journal->j_wait_updates, &wait);
}
spin_unlock(&commit_transaction->t_handle_lock);
+ commit_transaction->t_state = T_SWITCH;
+ write_unlock(&journal->j_state_lock);
J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
journal->j_max_transaction_buffers);
@@ -505,6 +507,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
atomic_sub(atomic_read(&journal->j_reserved_credits),
&commit_transaction->t_outstanding_credits);
+ write_lock(&journal->j_state_lock);
trace_jbd2_commit_flushing(journal, commit_transaction);
stats.run.rs_flushing = jiffies;
stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index c0b66a7a795b..cc35537232f2 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -138,9 +138,9 @@ static inline void update_t_max_wait(transaction_t *transaction,
}
/*
- * Wait until running transaction passes T_LOCKED state. Also starts the commit
- * if needed. The function expects running transaction to exist and releases
- * j_state_lock.
+ * Wait until running transaction passes to T_FLUSH state and new transaction
+ * can thus be started. Also starts the commit if needed. The function expects
+ * running transaction to exist and releases j_state_lock.
*/
static void wait_transaction_locked(journal_t *journal)
__releases(journal->j_state_lock)
@@ -160,6 +160,32 @@ static void wait_transaction_locked(journal_t *journal)
finish_wait(&journal->j_wait_transaction_locked, &wait);
}
+/*
+ * Wait until running transaction transitions from T_SWITCH to T_FLUSH
+ * state and new transaction can thus be started. The function releases
+ * j_state_lock.
+ */
+static void wait_transaction_switching(journal_t *journal)
+ __releases(journal->j_state_lock)
+{
+ DEFINE_WAIT(wait);
+
+ if (WARN_ON(!journal->j_running_transaction ||
+ journal->j_running_transaction->t_state != T_SWITCH))
+ return;
+ prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
+ TASK_UNINTERRUPTIBLE);
+ read_unlock(&journal->j_state_lock);
+ /*
+ * We don't call jbd2_might_wait_for_commit() here as there's no
+ * waiting for outstanding handles happening anymore in T_SWITCH state
+ * and handling of reserved handles actually relies on that for
+ * correctness.
+ */
+ schedule();
+ finish_wait(&journal->j_wait_transaction_locked, &wait);
+}
+
static void sub_reserved_credits(journal_t *journal, int blocks)
{
atomic_sub(blocks, &journal->j_reserved_credits);
@@ -183,7 +209,8 @@ static int add_transaction_credits(journal_t *journal, int blocks,
* If the current transaction is locked down for commit, wait
* for the lock to be released.
*/
- if (t->t_state == T_LOCKED) {
+ if (t->t_state != T_RUNNING) {
+ WARN_ON_ONCE(t->t_state >= T_FLUSH);
wait_transaction_locked(journal);
return 1;
}
@@ -360,8 +387,14 @@ repeat:
/*
* We have handle reserved so we are allowed to join T_LOCKED
* transaction and we don't have to check for transaction size
- * and journal space.
+ * and journal space. But we still have to wait while running
+ * transaction is being switched to a committing one as it
+ * won't wait for any handles anymore.
*/
+ if (transaction->t_state == T_SWITCH) {
+ wait_transaction_switching(journal);
+ goto repeat;
+ }
sub_reserved_credits(journal, blocks);
handle->h_reserved = 0;
}
@@ -910,7 +943,7 @@ repeat:
* this is the first time this transaction is touching this buffer,
* reset the modified flag
*/
- jh->b_modified = 0;
+ jh->b_modified = 0;
/*
* If the buffer is not journaled right now, we need to make sure it
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 902a7dd10e5c..bb6ae387469f 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -101,7 +101,8 @@ static int jffs2_sync_fs(struct super_block *sb, int wait)
struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
- cancel_delayed_work_sync(&c->wbuf_dwork);
+ if (jffs2_is_writebuffered(c))
+ cancel_delayed_work_sync(&c->wbuf_dwork);
#endif
mutex_lock(&c->alloc_sem);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index dbf5bc250bfd..f8d5021a652e 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -857,7 +857,6 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
static void kernfs_notify_workfn(struct work_struct *work)
{
struct kernfs_node *kn;
- struct kernfs_open_node *on;
struct kernfs_super_info *info;
repeat:
/* pop one off the notify_list */
@@ -871,17 +870,6 @@ repeat:
kn->attr.notify_next = NULL;
spin_unlock_irq(&kernfs_notify_lock);
- /* kick poll */
- spin_lock_irq(&kernfs_open_node_lock);
-
- on = kn->attr.open;
- if (on) {
- atomic_inc(&on->event);
- wake_up_interruptible(&on->poll);
- }
-
- spin_unlock_irq(&kernfs_open_node_lock);
-
/* kick fsnotify */
mutex_lock(&kernfs_mutex);
@@ -934,10 +922,21 @@ void kernfs_notify(struct kernfs_node *kn)
{
static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
unsigned long flags;
+ struct kernfs_open_node *on;
if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
return;
+ /* kick poll immediately */
+ spin_lock_irqsave(&kernfs_open_node_lock, flags);
+ on = kn->attr.open;
+ if (on) {
+ atomic_inc(&on->event);
+ wake_up_interruptible(&on->poll);
+ }
+ spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
+
+ /* schedule work to kick fsnotify */
spin_lock_irqsave(&kernfs_notify_lock, flags);
if (!kn->attr.notify_next) {
kernfs_get(kn);
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 00d5ef5f99f7..214a2fa1f1e3 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -128,24 +128,14 @@ static void encode_netobj(struct xdr_stream *xdr,
static int decode_netobj(struct xdr_stream *xdr,
struct xdr_netobj *obj)
{
- u32 length;
- __be32 *p;
+ ssize_t ret;
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
- length = be32_to_cpup(p++);
- if (unlikely(length > XDR_MAX_NETOBJ))
- goto out_size;
- obj->len = length;
- obj->data = (u8 *)p;
+ ret = xdr_stream_decode_opaque_inline(xdr, (void *)&obj->data,
+ XDR_MAX_NETOBJ);
+ if (unlikely(ret < 0))
+ return -EIO;
+ obj->len = ret;
return 0;
-out_size:
- dprintk("NFS: returned netobj was too long: %u\n", length);
- return -EIO;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}
/*
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index d20b92f271c2..e8a004097d18 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -256,7 +256,7 @@ static int nlm_wait_on_grace(wait_queue_head_t *queue)
* Generic NLM call
*/
static int
-nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc)
+nlmclnt_call(const struct cred *cred, struct nlm_rqst *req, u32 proc)
{
struct nlm_host *host = req->a_host;
struct rpc_clnt *clnt;
@@ -401,7 +401,7 @@ int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *t
* completion in order to be able to correctly track the lock
* state.
*/
-static int nlmclnt_async_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+static int nlmclnt_async_call(const struct cred *cred, struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
{
struct rpc_message msg = {
.rpc_argp = &req->a_args,
@@ -442,7 +442,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
fl->fl_start = req->a_res.lock.fl.fl_start;
fl->fl_end = req->a_res.lock.fl.fl_end;
fl->fl_type = req->a_res.lock.fl.fl_type;
- fl->fl_pid = 0;
+ fl->fl_pid = -req->a_res.lock.fl.fl_pid;
break;
default:
status = nlm_stat_to_errno(req->a_res.status);
@@ -510,7 +510,7 @@ static int do_vfs_lock(struct file_lock *fl)
static int
nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
{
- struct rpc_cred *cred = nfs_file_cred(fl->fl_file);
+ const struct cred *cred = nfs_file_cred(fl->fl_file);
struct nlm_host *host = req->a_host;
struct nlm_res *resp = &req->a_res;
struct nlm_wait *block = NULL;
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 2c6176387143..747b9c8c940a 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -125,24 +125,14 @@ static void encode_netobj(struct xdr_stream *xdr,
static int decode_netobj(struct xdr_stream *xdr,
struct xdr_netobj *obj)
{
- u32 length;
- __be32 *p;
+ ssize_t ret;
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
- length = be32_to_cpup(p++);
- if (unlikely(length > XDR_MAX_NETOBJ))
- goto out_size;
- obj->len = length;
- obj->data = (u8 *)p;
+ ret = xdr_stream_decode_opaque_inline(xdr, (void *)&obj->data,
+ XDR_MAX_NETOBJ);
+ if (unlikely(ret < 0))
+ return -EIO;
+ obj->len = ret;
return 0;
-out_size:
- dprintk("NFS: returned netobj was too long: %u\n", length);
- return -EIO;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}
/*
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 74330daeab71..ea719cdd6a36 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -276,7 +276,7 @@ static int nlmsvc_unlink_block(struct nlm_block *block)
dprintk("lockd: unlinking block %p...\n", block);
/* Remove block from list */
- status = posix_unblock_lock(&block->b_call->a_args.lock.fl);
+ status = locks_delete_block(&block->b_call->a_args.lock.fl);
nlmsvc_remove_block(block);
return status;
}
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 7147e4aebecc..9846f7e95282 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -127,7 +127,7 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
locks_init_lock(fl);
fl->fl_owner = current->files;
- fl->fl_pid = (pid_t)lock->svid;
+ fl->fl_pid = current->tgid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
start = ntohl(*p++);
@@ -269,7 +269,7 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
lock->svid = ~(u32) 0;
- lock->fl.fl_pid = (pid_t)lock->svid;
+ lock->fl.fl_pid = current->tgid;
if (!(p = nlm_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 7ed9edf9aed4..70154f376695 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -119,7 +119,7 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
locks_init_lock(fl);
fl->fl_owner = current->files;
- fl->fl_pid = (pid_t)lock->svid;
+ fl->fl_pid = current->tgid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
p = xdr_decode_hyper(p, &start);
@@ -266,7 +266,7 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
lock->svid = ~(u32) 0;
- lock->fl.fl_pid = (pid_t)lock->svid;
+ lock->fl.fl_pid = current->tgid;
if (!(p = nlm4_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
diff --git a/fs/locks.c b/fs/locks.c
index 2ecb4db8c840..ff6af2c32601 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -11,11 +11,11 @@
*
* Miscellaneous edits, and a total rewrite of posix_lock_file() code.
* Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994
- *
+ *
* Converted file_lock_table to a linked list from an array, which eliminates
* the limits on how many active file locks are open.
* Chad Page (pageone@netcom.com), November 27, 1994
- *
+ *
* Removed dependency on file descriptors. dup()'ed file descriptors now
* get the same locks as the original file descriptors, and a close() on
* any file descriptor removes ALL the locks on the file for the current
@@ -41,7 +41,7 @@
* with a file pointer (filp). As a result they can be shared by a parent
* process and its children after a fork(). They are removed when the last
* file descriptor referring to the file pointer is closed (unless explicitly
- * unlocked).
+ * unlocked).
*
* FL_FLOCK locks never deadlock, an existing lock is always removed before
* upgrading from shared to exclusive (or vice versa). When this happens
@@ -50,7 +50,7 @@
* Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995
*
* Removed some race conditions in flock_lock_file(), marked other possible
- * races. Just grep for FIXME to see them.
+ * races. Just grep for FIXME to see them.
* Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996.
*
* Addressed Dmitry's concerns. Deadlock checking no longer recursive.
@@ -112,6 +112,46 @@
* Leases and LOCK_MAND
* Matthew Wilcox <willy@debian.org>, June, 2000.
* Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000.
+ *
+ * Locking conflicts and dependencies:
+ * If multiple threads attempt to lock the same byte (or flock the same file)
+ * only one can be granted the lock, and other must wait their turn.
+ * The first lock has been "applied" or "granted", the others are "waiting"
+ * and are "blocked" by the "applied" lock..
+ *
+ * Waiting and applied locks are all kept in trees whose properties are:
+ *
+ * - the root of a tree may be an applied or waiting lock.
+ * - every other node in the tree is a waiting lock that
+ * conflicts with every ancestor of that node.
+ *
+ * Every such tree begins life as a waiting singleton which obviously
+ * satisfies the above properties.
+ *
+ * The only ways we modify trees preserve these properties:
+ *
+ * 1. We may add a new leaf node, but only after first verifying that it
+ * conflicts with all of its ancestors.
+ * 2. We may remove the root of a tree, creating a new singleton
+ * tree from the root and N new trees rooted in the immediate
+ * children.
+ * 3. If the root of a tree is not currently an applied lock, we may
+ * apply it (if possible).
+ * 4. We may upgrade the root of the tree (either extend its range,
+ * or upgrade its entire range from read to write).
+ *
+ * When an applied lock is modified in a way that reduces or downgrades any
+ * part of its range, we remove all its children (2 above). This particularly
+ * happens when a lock is unlocked.
+ *
+ * For each of those child trees we "wake up" the thread which is
+ * waiting for the lock so it can continue handling as follows: if the
+ * root of the tree applies, we do so (3). If it doesn't, it must
+ * conflict with some applied lock. We remove (wake up) all of its children
+ * (2), and add it is a new leaf to the tree rooted in the applied
+ * lock (1). We then repeat the process recursively with those
+ * children.
+ *
*/
#include <linux/capability.h>
@@ -189,9 +229,9 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
* This lock protects the blocked_hash. Generally, if you're accessing it, you
* want to be holding this lock.
*
- * In addition, it also protects the fl->fl_block list, and the fl->fl_next
- * pointer for file_lock structures that are acting as lock requests (in
- * contrast to those that are acting as records of acquired locks).
+ * In addition, it also protects the fl->fl_blocked_requests list, and the
+ * fl->fl_blocker pointer for file_lock structures that are acting as lock
+ * requests (in contrast to those that are acting as records of acquired locks).
*
* Note that when we acquire this lock in order to change the above fields,
* we often hold the flc_lock as well. In certain cases, when reading the fields
@@ -293,7 +333,8 @@ static void locks_init_lock_heads(struct file_lock *fl)
{
INIT_HLIST_NODE(&fl->fl_link);
INIT_LIST_HEAD(&fl->fl_list);
- INIT_LIST_HEAD(&fl->fl_block);
+ INIT_LIST_HEAD(&fl->fl_blocked_requests);
+ INIT_LIST_HEAD(&fl->fl_blocked_member);
init_waitqueue_head(&fl->fl_wait);
}
@@ -332,7 +373,8 @@ void locks_free_lock(struct file_lock *fl)
{
BUG_ON(waitqueue_active(&fl->fl_wait));
BUG_ON(!list_empty(&fl->fl_list));
- BUG_ON(!list_empty(&fl->fl_block));
+ BUG_ON(!list_empty(&fl->fl_blocked_requests));
+ BUG_ON(!list_empty(&fl->fl_blocked_member));
BUG_ON(!hlist_unhashed(&fl->fl_link));
locks_release_private(fl);
@@ -357,7 +399,6 @@ void locks_init_lock(struct file_lock *fl)
memset(fl, 0, sizeof(struct file_lock));
locks_init_lock_heads(fl);
}
-
EXPORT_SYMBOL(locks_init_lock);
/*
@@ -397,9 +438,26 @@ void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
fl->fl_ops->fl_copy_lock(new, fl);
}
}
-
EXPORT_SYMBOL(locks_copy_lock);
+static void locks_move_blocks(struct file_lock *new, struct file_lock *fl)
+{
+ struct file_lock *f;
+
+ /*
+ * As ctx->flc_lock is held, new requests cannot be added to
+ * ->fl_blocked_requests, so we don't need a lock to check if it
+ * is empty.
+ */
+ if (list_empty(&fl->fl_blocked_requests))
+ return;
+ spin_lock(&blocked_lock_lock);
+ list_splice_init(&fl->fl_blocked_requests, &new->fl_blocked_requests);
+ list_for_each_entry(f, &new->fl_blocked_requests, fl_blocked_member)
+ f->fl_blocker = new;
+ spin_unlock(&blocked_lock_lock);
+}
+
static inline int flock_translate_cmd(int cmd) {
if (cmd & LOCK_MAND)
return cmd & (LOCK_MAND | LOCK_RW);
@@ -416,17 +474,20 @@ static inline int flock_translate_cmd(int cmd) {
/* Fill in a file_lock structure with an appropriate FLOCK lock. */
static struct file_lock *
-flock_make_lock(struct file *filp, unsigned int cmd)
+flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl)
{
- struct file_lock *fl;
int type = flock_translate_cmd(cmd);
if (type < 0)
return ERR_PTR(type);
-
- fl = locks_alloc_lock();
- if (fl == NULL)
- return ERR_PTR(-ENOMEM);
+
+ if (fl == NULL) {
+ fl = locks_alloc_lock();
+ if (fl == NULL)
+ return ERR_PTR(-ENOMEM);
+ } else {
+ locks_init_lock(fl);
+ }
fl->fl_file = filp;
fl->fl_owner = filp;
@@ -434,7 +495,7 @@ flock_make_lock(struct file *filp, unsigned int cmd)
fl->fl_flags = FL_FLOCK;
fl->fl_type = type;
fl->fl_end = OFFSET_MAX;
-
+
return fl;
}
@@ -666,16 +727,58 @@ static void locks_delete_global_blocked(struct file_lock *waiter)
static void __locks_delete_block(struct file_lock *waiter)
{
locks_delete_global_blocked(waiter);
- list_del_init(&waiter->fl_block);
- waiter->fl_next = NULL;
+ list_del_init(&waiter->fl_blocked_member);
+ waiter->fl_blocker = NULL;
}
-static void locks_delete_block(struct file_lock *waiter)
+static void __locks_wake_up_blocks(struct file_lock *blocker)
{
+ while (!list_empty(&blocker->fl_blocked_requests)) {
+ struct file_lock *waiter;
+
+ waiter = list_first_entry(&blocker->fl_blocked_requests,
+ struct file_lock, fl_blocked_member);
+ __locks_delete_block(waiter);
+ if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
+ waiter->fl_lmops->lm_notify(waiter);
+ else
+ wake_up(&waiter->fl_wait);
+ }
+}
+
+/**
+ * locks_delete_lock - stop waiting for a file lock
+ * @waiter: the lock which was waiting
+ *
+ * lockd/nfsd need to disconnect the lock while working on it.
+ */
+int locks_delete_block(struct file_lock *waiter)
+{
+ int status = -ENOENT;
+
+ /*
+ * If fl_blocker is NULL, it won't be set again as this thread
+ * "owns" the lock and is the only one that might try to claim
+ * the lock. So it is safe to test fl_blocker locklessly.
+ * Also if fl_blocker is NULL, this waiter is not listed on
+ * fl_blocked_requests for some lock, so no other request can
+ * be added to the list of fl_blocked_requests for this
+ * request. So if fl_blocker is NULL, it is safe to
+ * locklessly check if fl_blocked_requests is empty. If both
+ * of these checks succeed, there is no need to take the lock.
+ */
+ if (waiter->fl_blocker == NULL &&
+ list_empty(&waiter->fl_blocked_requests))
+ return status;
spin_lock(&blocked_lock_lock);
+ if (waiter->fl_blocker)
+ status = 0;
+ __locks_wake_up_blocks(waiter);
__locks_delete_block(waiter);
spin_unlock(&blocked_lock_lock);
+ return status;
}
+EXPORT_SYMBOL(locks_delete_block);
/* Insert waiter into blocker's block list.
* We use a circular list so that processes can be easily woken up in
@@ -683,26 +786,49 @@ static void locks_delete_block(struct file_lock *waiter)
* it seems like the reasonable thing to do.
*
* Must be called with both the flc_lock and blocked_lock_lock held. The
- * fl_block list itself is protected by the blocked_lock_lock, but by ensuring
- * that the flc_lock is also held on insertions we can avoid taking the
- * blocked_lock_lock in some cases when we see that the fl_block list is empty.
+ * fl_blocked_requests list itself is protected by the blocked_lock_lock,
+ * but by ensuring that the flc_lock is also held on insertions we can avoid
+ * taking the blocked_lock_lock in some cases when we see that the
+ * fl_blocked_requests list is empty.
+ *
+ * Rather than just adding to the list, we check for conflicts with any existing
+ * waiters, and add beneath any waiter that blocks the new waiter.
+ * Thus wakeups don't happen until needed.
*/
static void __locks_insert_block(struct file_lock *blocker,
- struct file_lock *waiter)
+ struct file_lock *waiter,
+ bool conflict(struct file_lock *,
+ struct file_lock *))
{
- BUG_ON(!list_empty(&waiter->fl_block));
- waiter->fl_next = blocker;
- list_add_tail(&waiter->fl_block, &blocker->fl_block);
+ struct file_lock *fl;
+ BUG_ON(!list_empty(&waiter->fl_blocked_member));
+
+new_blocker:
+ list_for_each_entry(fl, &blocker->fl_blocked_requests, fl_blocked_member)
+ if (conflict(fl, waiter)) {
+ blocker = fl;
+ goto new_blocker;
+ }
+ waiter->fl_blocker = blocker;
+ list_add_tail(&waiter->fl_blocked_member, &blocker->fl_blocked_requests);
if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
locks_insert_global_blocked(waiter);
+
+ /* The requests in waiter->fl_blocked are known to conflict with
+ * waiter, but might not conflict with blocker, or the requests
+ * and lock which block it. So they all need to be woken.
+ */
+ __locks_wake_up_blocks(waiter);
}
/* Must be called with flc_lock held. */
static void locks_insert_block(struct file_lock *blocker,
- struct file_lock *waiter)
+ struct file_lock *waiter,
+ bool conflict(struct file_lock *,
+ struct file_lock *))
{
spin_lock(&blocked_lock_lock);
- __locks_insert_block(blocker, waiter);
+ __locks_insert_block(blocker, waiter, conflict);
spin_unlock(&blocked_lock_lock);
}
@@ -716,25 +842,15 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
/*
* Avoid taking global lock if list is empty. This is safe since new
* blocked requests are only added to the list under the flc_lock, and
- * the flc_lock is always held here. Note that removal from the fl_block
- * list does not require the flc_lock, so we must recheck list_empty()
- * after acquiring the blocked_lock_lock.
+ * the flc_lock is always held here. Note that removal from the
+ * fl_blocked_requests list does not require the flc_lock, so we must
+ * recheck list_empty() after acquiring the blocked_lock_lock.
*/
- if (list_empty(&blocker->fl_block))
+ if (list_empty(&blocker->fl_blocked_requests))
return;
spin_lock(&blocked_lock_lock);
- while (!list_empty(&blocker->fl_block)) {
- struct file_lock *waiter;
-
- waiter = list_first_entry(&blocker->fl_block,
- struct file_lock, fl_block);
- __locks_delete_block(waiter);
- if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
- waiter->fl_lmops->lm_notify(waiter);
- else
- wake_up(&waiter->fl_wait);
- }
+ __locks_wake_up_blocks(blocker);
spin_unlock(&blocked_lock_lock);
}
@@ -766,47 +882,50 @@ locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose)
/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
* checks for shared/exclusive status of overlapping locks.
*/
-static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+static bool locks_conflict(struct file_lock *caller_fl,
+ struct file_lock *sys_fl)
{
if (sys_fl->fl_type == F_WRLCK)
- return 1;
+ return true;
if (caller_fl->fl_type == F_WRLCK)
- return 1;
- return 0;
+ return true;
+ return false;
}
/* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
* checking before calling the locks_conflict().
*/
-static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+static bool posix_locks_conflict(struct file_lock *caller_fl,
+ struct file_lock *sys_fl)
{
/* POSIX locks owned by the same process do not conflict with
* each other.
*/
if (posix_same_owner(caller_fl, sys_fl))
- return (0);
+ return false;
/* Check whether they overlap */
if (!locks_overlap(caller_fl, sys_fl))
- return 0;
+ return false;
- return (locks_conflict(caller_fl, sys_fl));
+ return locks_conflict(caller_fl, sys_fl);
}
/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
* checking before calling the locks_conflict().
*/
-static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+static bool flock_locks_conflict(struct file_lock *caller_fl,
+ struct file_lock *sys_fl)
{
/* FLOCK locks referring to the same filp do not conflict with
* each other.
*/
if (caller_fl->fl_file == sys_fl->fl_file)
- return (0);
+ return false;
if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
- return 0;
+ return false;
- return (locks_conflict(caller_fl, sys_fl));
+ return locks_conflict(caller_fl, sys_fl);
}
void
@@ -877,8 +996,11 @@ static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl)
struct file_lock *fl;
hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) {
- if (posix_same_owner(fl, block_fl))
- return fl->fl_next;
+ if (posix_same_owner(fl, block_fl)) {
+ while (fl->fl_blocker)
+ fl = fl->fl_blocker;
+ return fl;
+ }
}
return NULL;
}
@@ -965,12 +1087,13 @@ find_conflict:
if (!(request->fl_flags & FL_SLEEP))
goto out;
error = FILE_LOCK_DEFERRED;
- locks_insert_block(fl, request);
+ locks_insert_block(fl, request, flock_locks_conflict);
goto out;
}
if (request->fl_flags & FL_ACCESS)
goto out;
locks_copy_lock(new_fl, request);
+ locks_move_blocks(new_fl, request);
locks_insert_lock_ctx(new_fl, &ctx->flc_flock);
new_fl = NULL;
error = 0;
@@ -1039,12 +1162,13 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
spin_lock(&blocked_lock_lock);
if (likely(!posix_locks_deadlock(request, fl))) {
error = FILE_LOCK_DEFERRED;
- __locks_insert_block(fl, request);
+ __locks_insert_block(fl, request,
+ posix_locks_conflict);
}
spin_unlock(&blocked_lock_lock);
goto out;
- }
- }
+ }
+ }
/* If we're just looking for a conflict, we're done. */
error = 0;
@@ -1164,6 +1288,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
goto out;
}
locks_copy_lock(new_fl, request);
+ locks_move_blocks(new_fl, request);
locks_insert_lock_ctx(new_fl, &fl->fl_list);
fl = new_fl;
new_fl = NULL;
@@ -1237,13 +1362,11 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
- if (!error)
- continue;
-
- locks_delete_block(fl);
- break;
+ error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
+ if (error)
+ break;
}
+ locks_delete_block(fl);
return error;
}
@@ -1324,7 +1447,7 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
error = posix_lock_inode(inode, &fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
+ error = wait_event_interruptible(fl.fl_wait, !fl.fl_blocker);
if (!error) {
/*
* If we've been sleeping someone might have
@@ -1334,13 +1457,12 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
continue;
}
- locks_delete_block(&fl);
break;
}
+ locks_delete_block(&fl);
return error;
}
-
EXPORT_SYMBOL(locks_mandatory_area);
#endif /* CONFIG_MANDATORY_FILE_LOCKING */
@@ -1511,14 +1633,14 @@ restart:
break_time -= jiffies;
if (break_time == 0)
break_time++;
- locks_insert_block(fl, new_fl);
+ locks_insert_block(fl, new_fl, leases_conflict);
trace_break_lease_block(inode, new_fl);
spin_unlock(&ctx->flc_lock);
percpu_up_read_preempt_enable(&file_rwsem);
locks_dispose_list(&dispose);
error = wait_event_interruptible_timeout(new_fl->fl_wait,
- !new_fl->fl_next, break_time);
+ !new_fl->fl_blocker, break_time);
percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
@@ -1542,7 +1664,6 @@ out:
locks_free_lock(new_fl);
return error;
}
-
EXPORT_SYMBOL(__break_lease);
/**
@@ -1573,7 +1694,6 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time)
if (has_lease)
*time = current_time(inode);
}
-
EXPORT_SYMBOL(lease_get_mtime);
/**
@@ -1628,8 +1748,8 @@ int fcntl_getlease(struct file *filp)
/**
* check_conflicting_open - see if the given dentry points to a file that has
- * an existing open that would conflict with the
- * desired lease.
+ * an existing open that would conflict with the
+ * desired lease.
* @dentry: dentry to check
* @arg: type of lease that we're trying to acquire
* @flags: current lock flags
@@ -1646,7 +1766,7 @@ check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
if (flags & FL_LAYOUT)
return 0;
- if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
+ if ((arg == F_RDLCK) && inode_is_open_for_write(inode))
return -EAGAIN;
if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
@@ -1853,7 +1973,7 @@ EXPORT_SYMBOL(generic_setlease);
* @arg: type of lease to obtain
* @lease: file_lock to use when adding a lease
* @priv: private info for lm_setup when adding a lease (may be
- * NULL if lm_setup doesn't require it)
+ * NULL if lm_setup doesn't require it)
*
* Call this to establish a lease on the file. The "lease" argument is not
* used for F_UNLCK requests and may be NULL. For commands that set or alter
@@ -1931,13 +2051,11 @@ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
error = flock_lock_inode(inode, fl);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
- if (!error)
- continue;
-
- locks_delete_block(fl);
- break;
+ error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
+ if (error)
+ break;
}
+ locks_delete_block(fl);
return error;
}
@@ -2001,7 +2119,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
!(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
goto out_putf;
- lock = flock_make_lock(f.file, cmd);
+ lock = flock_make_lock(f.file, cmd, NULL);
if (IS_ERR(lock)) {
error = PTR_ERR(lock);
goto out_putf;
@@ -2143,7 +2261,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
error = vfs_test_lock(filp, fl);
if (error)
goto out;
-
+
flock->l_type = fl->fl_type;
if (fl->fl_type != F_UNLCK) {
error = posix_lock_to_flock(flock, fl);
@@ -2210,13 +2328,11 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd,
error = vfs_lock_file(filp, cmd, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
- if (!error)
- continue;
-
- locks_delete_block(fl);
- break;
+ error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
+ if (error)
+ break;
}
+ locks_delete_block(fl);
return error;
}
@@ -2476,6 +2592,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
if (!ctx || list_empty(&ctx->flc_posix))
return;
+ locks_init_lock(&lock);
lock.fl_type = F_UNLCK;
lock.fl_flags = FL_POSIX | FL_CLOSE;
lock.fl_start = 0;
@@ -2492,26 +2609,21 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
lock.fl_ops->fl_release_private(&lock);
trace_locks_remove_posix(inode, &lock, error);
}
-
EXPORT_SYMBOL(locks_remove_posix);
/* The i_flctx must be valid when calling into here */
static void
locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
{
- struct file_lock fl = {
- .fl_owner = filp,
- .fl_pid = current->tgid,
- .fl_file = filp,
- .fl_flags = FL_FLOCK | FL_CLOSE,
- .fl_type = F_UNLCK,
- .fl_end = OFFSET_MAX,
- };
+ struct file_lock fl;
struct inode *inode = locks_inode(filp);
if (list_empty(&flctx->flc_flock))
return;
+ flock_make_lock(filp, LOCK_UN, &fl);
+ fl.fl_flags |= FL_CLOSE;
+
if (filp->f_op->flock)
filp->f_op->flock(filp, F_SETLKW, &fl);
else
@@ -2570,27 +2682,6 @@ void locks_remove_file(struct file *filp)
}
/**
- * posix_unblock_lock - stop waiting for a file lock
- * @waiter: the lock which was waiting
- *
- * lockd needs to block waiting for locks.
- */
-int
-posix_unblock_lock(struct file_lock *waiter)
-{
- int status = 0;
-
- spin_lock(&blocked_lock_lock);
- if (waiter->fl_next)
- __locks_delete_block(waiter);
- else
- status = -ENOENT;
- spin_unlock(&blocked_lock_lock);
- return status;
-}
-EXPORT_SYMBOL(posix_unblock_lock);
-
-/**
* vfs_cancel_lock - file byte range unblock lock
* @filp: The file to apply the unblock to
* @fl: The lock to be unblocked
@@ -2603,7 +2694,6 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
return filp->f_op->lock(filp, F_CANCELLK, fl);
return 0;
}
-
EXPORT_SYMBOL_GPL(vfs_cancel_lock);
#ifdef CONFIG_PROC_FS
@@ -2707,7 +2797,7 @@ static int locks_show(struct seq_file *f, void *v)
lock_get_status(f, fl, iter->li_pos, "");
- list_for_each_entry(bfl, &fl->fl_block, fl_block)
+ list_for_each_entry(bfl, &fl->fl_blocked_requests, fl_blocked_member)
lock_get_status(f, bfl, iter->li_pos, " ->");
return 0;
@@ -2803,7 +2893,6 @@ static int __init filelock_init(void)
filelock_cache = kmem_cache_create("file_lock_cache",
sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
-
for_each_possible_cpu(i) {
struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
@@ -2813,5 +2902,4 @@ static int __init filelock_init(void)
return 0;
}
-
core_initcall(filelock_init);
diff --git a/fs/namei.c b/fs/namei.c
index 0cab6494978c..914178cdbe94 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3701,8 +3701,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
if (error)
return error;
- if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
- !ns_capable(dentry->d_sb->s_user_ns, CAP_MKNOD))
+ if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
return -EPERM;
if (!dir->i_op->mknod)
diff --git a/fs/namespace.c b/fs/namespace.c
index a7f91265ea67..a677b59efd74 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -26,6 +26,7 @@
#include <linux/memblock.h>
#include <linux/task_work.h>
#include <linux/sched/task.h>
+#include <uapi/linux/mount.h>
#include "pnode.h"
#include "internal.h"
@@ -245,13 +246,9 @@ out_free_cache:
* mnt_want/drop_write() will _keep_ the filesystem
* r/w.
*/
-int __mnt_is_readonly(struct vfsmount *mnt)
+bool __mnt_is_readonly(struct vfsmount *mnt)
{
- if (mnt->mnt_flags & MNT_READONLY)
- return 1;
- if (sb_rdonly(mnt->mnt_sb))
- return 1;
- return 0;
+ return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
}
EXPORT_SYMBOL_GPL(__mnt_is_readonly);
@@ -507,11 +504,12 @@ static int mnt_make_readonly(struct mount *mnt)
return ret;
}
-static void __mnt_unmake_readonly(struct mount *mnt)
+static int __mnt_unmake_readonly(struct mount *mnt)
{
lock_mount_hash();
mnt->mnt.mnt_flags &= ~MNT_READONLY;
unlock_mount_hash();
+ return 0;
}
int sb_prepare_remount_readonly(struct super_block *sb)
@@ -1360,7 +1358,7 @@ static void namespace_unlock(void)
if (likely(hlist_empty(&head)))
return;
- synchronize_rcu();
+ synchronize_rcu_expedited();
group_pin_kill(&head);
}
@@ -2215,21 +2213,91 @@ out:
return err;
}
-static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
+/*
+ * Don't allow locked mount flags to be cleared.
+ *
+ * No locks need to be held here while testing the various MNT_LOCK
+ * flags because those flags can never be cleared once they are set.
+ */
+static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
{
- int error = 0;
- int readonly_request = 0;
+ unsigned int fl = mnt->mnt.mnt_flags;
+
+ if ((fl & MNT_LOCK_READONLY) &&
+ !(mnt_flags & MNT_READONLY))
+ return false;
- if (ms_flags & MS_RDONLY)
- readonly_request = 1;
- if (readonly_request == __mnt_is_readonly(mnt))
+ if ((fl & MNT_LOCK_NODEV) &&
+ !(mnt_flags & MNT_NODEV))
+ return false;
+
+ if ((fl & MNT_LOCK_NOSUID) &&
+ !(mnt_flags & MNT_NOSUID))
+ return false;
+
+ if ((fl & MNT_LOCK_NOEXEC) &&
+ !(mnt_flags & MNT_NOEXEC))
+ return false;
+
+ if ((fl & MNT_LOCK_ATIME) &&
+ ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
+ return false;
+
+ return true;
+}
+
+static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
+{
+ bool readonly_request = (mnt_flags & MNT_READONLY);
+
+ if (readonly_request == __mnt_is_readonly(&mnt->mnt))
return 0;
if (readonly_request)
- error = mnt_make_readonly(real_mount(mnt));
- else
- __mnt_unmake_readonly(real_mount(mnt));
- return error;
+ return mnt_make_readonly(mnt);
+
+ return __mnt_unmake_readonly(mnt);
+}
+
+/*
+ * Update the user-settable attributes on a mount. The caller must hold
+ * sb->s_umount for writing.
+ */
+static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
+{
+ lock_mount_hash();
+ mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
+ mnt->mnt.mnt_flags = mnt_flags;
+ touch_mnt_namespace(mnt->mnt_ns);
+ unlock_mount_hash();
+}
+
+/*
+ * Handle reconfiguration of the mountpoint only without alteration of the
+ * superblock it refers to. This is triggered by specifying MS_REMOUNT|MS_BIND
+ * to mount(2).
+ */
+static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
+{
+ struct super_block *sb = path->mnt->mnt_sb;
+ struct mount *mnt = real_mount(path->mnt);
+ int ret;
+
+ if (!check_mnt(mnt))
+ return -EINVAL;
+
+ if (path->dentry != mnt->mnt.mnt_root)
+ return -EINVAL;
+
+ if (!can_change_locked_flags(mnt, mnt_flags))
+ return -EPERM;
+
+ down_write(&sb->s_umount);
+ ret = change_mount_ro_state(mnt, mnt_flags);
+ if (ret == 0)
+ set_mount_attributes(mnt, mnt_flags);
+ up_write(&sb->s_umount);
+ return ret;
}
/*
@@ -2243,6 +2311,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
int err;
struct super_block *sb = path->mnt->mnt_sb;
struct mount *mnt = real_mount(path->mnt);
+ void *sec_opts = NULL;
if (!check_mnt(mnt))
return -EINVAL;
@@ -2250,50 +2319,25 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;
- /* Don't allow changing of locked mnt flags.
- *
- * No locks need to be held here while testing the various
- * MNT_LOCK flags because those flags can never be cleared
- * once they are set.
- */
- if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
- !(mnt_flags & MNT_READONLY)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
- !(mnt_flags & MNT_NODEV)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
- !(mnt_flags & MNT_NOSUID)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
- !(mnt_flags & MNT_NOEXEC)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
- ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
+ if (!can_change_locked_flags(mnt, mnt_flags))
return -EPERM;
- }
- err = security_sb_remount(sb, data);
+ if (data && !(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)) {
+ err = security_sb_eat_lsm_opts(data, &sec_opts);
+ if (err)
+ return err;
+ }
+ err = security_sb_remount(sb, sec_opts);
+ security_free_mnt_opts(&sec_opts);
if (err)
return err;
down_write(&sb->s_umount);
- if (ms_flags & MS_BIND)
- err = change_mount_flags(path->mnt, ms_flags);
- else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
- err = -EPERM;
- else
+ err = -EPERM;
+ if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
err = do_remount_sb(sb, sb_flags, data, 0);
- if (!err) {
- lock_mount_hash();
- mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
- mnt->mnt.mnt_flags = mnt_flags;
- touch_mnt_namespace(mnt->mnt_ns);
- unlock_mount_hash();
+ if (!err)
+ set_mount_attributes(mnt, mnt_flags);
}
up_write(&sb->s_umount);
return err;
@@ -2651,7 +2695,7 @@ static long exact_copy_from_user(void *to, const void __user * from,
const char __user *f = from;
char c;
- if (!access_ok(VERIFY_READ, from, n))
+ if (!access_ok(from, n))
return n;
current->kernel_uaccess_faults_ok++;
@@ -2788,7 +2832,9 @@ long do_mount(const char *dev_name, const char __user *dir_name,
SB_LAZYTIME |
SB_I_VERSION);
- if (flags & MS_REMOUNT)
+ if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
+ retval = do_reconfigure_mnt(&path, mnt_flags);
+ else if (flags & MS_REMOUNT)
retval = do_remount(&path, flags, sb_flags, mnt_flags,
data_page);
else if (flags & MS_BIND)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index d3781cd983f6..690221747b47 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -584,7 +584,7 @@ static int decode_sector_number(__be32 **rp, sector_t *sp)
static struct nfs4_deviceid_node *
bl_find_get_deviceid(struct nfs_server *server,
- const struct nfs4_deviceid *id, struct rpc_cred *cred,
+ const struct nfs4_deviceid *id, const struct cred *cred,
gfp_t gfp_mask)
{
struct nfs4_deviceid_node *node;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 509dc5adeb8f..0b602a39dd71 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -56,7 +56,7 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
if (ret > 0) {
nn->nfs_callback_tcpport6 = ret;
- dprintk("NFS: Callback listener port = %u (af %u, net %x\n",
+ dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
nn->nfs_callback_tcpport6, PF_INET6, net->ns.inum);
} else if (ret != -EAFNOSUPPORT)
goto out_err;
@@ -206,11 +206,13 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
goto err_bind;
}
- ret = -EPROTONOSUPPORT;
+ ret = 0;
if (!IS_ENABLED(CONFIG_NFS_V4_1) || minorversion == 0)
ret = nfs4_callback_up_net(serv, net);
- else if (xprt->ops->bc_up)
- ret = xprt->ops->bc_up(serv, net);
+ else if (xprt->ops->bc_setup)
+ set_bc_enabled(serv);
+ else
+ ret = -EPROTONOSUPPORT;
if (ret < 0) {
printk(KERN_ERR "NFS: callback service start failed\n");
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 96d5f8135eb9..fb1cf1a4bda2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -151,7 +151,6 @@ EXPORT_SYMBOL_GPL(unregister_nfs_version);
struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
{
struct nfs_client *clp;
- struct rpc_cred *cred;
int err = -ENOMEM;
if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
@@ -182,9 +181,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_proto = cl_init->proto;
clp->cl_net = get_net(cl_init->net);
- cred = rpc_lookup_machine_cred("*");
- if (!IS_ERR(cred))
- clp->cl_machine_cred = cred;
+ clp->cl_principal = "*";
nfs_fscache_get_client_cookie(clp);
return clp;
@@ -246,9 +243,6 @@ void nfs_free_client(struct nfs_client *clp)
if (!IS_ERR(clp->cl_rpcclient))
rpc_shutdown_client(clp->cl_rpcclient);
- if (clp->cl_machine_cred != NULL)
- put_rpccred(clp->cl_machine_cred);
-
put_net(clp->cl_net);
put_nfs_version(clp->cl_nfs_mod);
kfree(clp->cl_hostname);
@@ -527,6 +521,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
return PTR_ERR(clnt);
}
+ clnt->cl_principal = clp->cl_principal;
clp->cl_rpcclient = clnt;
return 0;
}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 6ec2f78c1e19..885363ca8569 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -26,10 +26,8 @@
static void nfs_free_delegation(struct nfs_delegation *delegation)
{
- if (delegation->cred) {
- put_rpccred(delegation->cred);
- delegation->cred = NULL;
- }
+ put_cred(delegation->cred);
+ delegation->cred = NULL;
kfree_rcu(delegation, rcu);
}
@@ -178,13 +176,13 @@ again:
* @pagemod_limit: write delegation "space_limit"
*
*/
-void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
+void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
fmode_t type,
const nfs4_stateid *stateid,
unsigned long pagemod_limit)
{
struct nfs_delegation *delegation;
- struct rpc_cred *oldcred = NULL;
+ const struct cred *oldcred = NULL;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
@@ -195,12 +193,12 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
delegation->type = type;
delegation->pagemod_limit = pagemod_limit;
oldcred = delegation->cred;
- delegation->cred = get_rpccred(cred);
+ delegation->cred = get_cred(cred);
clear_bit(NFS_DELEGATION_NEED_RECLAIM,
&delegation->flags);
spin_unlock(&delegation->lock);
rcu_read_unlock();
- put_rpccred(oldcred);
+ put_cred(oldcred);
trace_nfs4_reclaim_delegation(inode, type);
return;
}
@@ -341,7 +339,7 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
*
* Returns zero on success, or a negative errno value.
*/
-int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred,
+int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
fmode_t type,
const nfs4_stateid *stateid,
unsigned long pagemod_limit)
@@ -360,7 +358,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred,
delegation->type = type;
delegation->pagemod_limit = pagemod_limit;
delegation->change_attr = inode_peek_iversion_raw(inode);
- delegation->cred = get_rpccred(cred);
+ delegation->cred = get_cred(cred);
delegation->inode = inode;
delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
spin_lock_init(&delegation->lock);
@@ -1047,7 +1045,7 @@ void nfs_reap_expired_delegations(struct nfs_client *clp)
struct nfs_delegation *delegation;
struct nfs_server *server;
struct inode *inode;
- struct rpc_cred *cred;
+ const struct cred *cred;
nfs4_stateid stateid;
restart:
@@ -1069,7 +1067,7 @@ restart:
nfs_sb_deactive(server->super);
goto restart;
}
- cred = get_rpccred_rcu(delegation->cred);
+ cred = get_cred_rcu(delegation->cred);
nfs4_stateid_copy(&stateid, &delegation->stateid);
clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
rcu_read_unlock();
@@ -1078,7 +1076,7 @@ restart:
nfs_revoke_delegation(inode, &stateid);
nfs_inode_find_state_and_recover(inode, &stateid);
}
- put_rpccred(cred);
+ put_cred(cred);
if (nfs4_server_rebooted(clp)) {
nfs_inode_mark_test_expired_delegation(server,inode);
iput(inode);
@@ -1173,7 +1171,7 @@ out:
* otherwise "false" is returned.
*/
bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags,
- nfs4_stateid *dst, struct rpc_cred **cred)
+ nfs4_stateid *dst, const struct cred **cred)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
@@ -1187,7 +1185,7 @@ bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags,
nfs4_stateid_copy(dst, &delegation->stateid);
nfs_mark_delegation_referenced(delegation);
if (cred)
- *cred = get_rpccred(delegation->cred);
+ *cred = get_cred(delegation->cred);
}
rcu_read_unlock();
return ret;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index bb1ef8c37af4..dcbf3394ba0e 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -15,7 +15,7 @@
*/
struct nfs_delegation {
struct list_head super_list;
- struct rpc_cred *cred;
+ const struct cred *cred;
struct inode *inode;
nfs4_stateid stateid;
fmode_t type;
@@ -36,9 +36,9 @@ enum {
NFS_DELEGATION_TEST_EXPIRED,
};
-int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred,
+int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
-void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
+void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
int nfs4_inode_return_delegation(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
@@ -60,10 +60,10 @@ void nfs_mark_test_expired_all_delegations(struct nfs_client *clp);
void nfs_reap_expired_delegations(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */
-int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
+int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync);
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
-bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred);
+bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred);
bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 71b2e390becf..6bf4471850c8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -67,7 +67,7 @@ const struct address_space_operations nfs_dir_aops = {
.freepage = nfs_readdir_clear_array,
};
-static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred)
+static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, const struct cred *cred)
{
struct nfs_inode *nfsi = NFS_I(dir);
struct nfs_open_dir_context *ctx;
@@ -77,7 +77,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
ctx->attr_gencount = nfsi->attr_gencount;
ctx->dir_cookie = 0;
ctx->dup_cookie = 0;
- ctx->cred = get_rpccred(cred);
+ ctx->cred = get_cred(cred);
spin_lock(&dir->i_lock);
list_add(&ctx->list, &nfsi->open_files);
spin_unlock(&dir->i_lock);
@@ -91,7 +91,7 @@ static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_cont
spin_lock(&dir->i_lock);
list_del(&ctx->list);
spin_unlock(&dir->i_lock);
- put_rpccred(ctx->cred);
+ put_cred(ctx->cred);
kfree(ctx);
}
@@ -103,23 +103,18 @@ nfs_opendir(struct inode *inode, struct file *filp)
{
int res = 0;
struct nfs_open_dir_context *ctx;
- struct rpc_cred *cred;
dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSOPEN);
- cred = rpc_lookup_cred();
- if (IS_ERR(cred))
- return PTR_ERR(cred);
- ctx = alloc_nfs_open_dir_context(inode, cred);
+ ctx = alloc_nfs_open_dir_context(inode, current_cred());
if (IS_ERR(ctx)) {
res = PTR_ERR(ctx);
goto out;
}
filp->private_data = ctx;
out:
- put_rpccred(cred);
return res;
}
@@ -334,7 +329,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
struct nfs_entry *entry, struct file *file, struct inode *inode)
{
struct nfs_open_dir_context *ctx = file->private_data;
- struct rpc_cred *cred = ctx->cred;
+ const struct cred *cred = ctx->cred;
unsigned long timestamp, gencount;
int error;
@@ -2139,7 +2134,7 @@ MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache lengt
static void nfs_access_free_entry(struct nfs_access_entry *entry)
{
- put_rpccred(entry->cred);
+ put_cred(entry->cred);
kfree_rcu(entry, rcu_head);
smp_mb__before_atomic();
atomic_long_dec(&nfs_access_nr_entries);
@@ -2265,17 +2260,18 @@ void nfs_access_zap_cache(struct inode *inode)
}
EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
-static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
+static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred)
{
struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
- struct nfs_access_entry *entry;
while (n != NULL) {
- entry = rb_entry(n, struct nfs_access_entry, rb_node);
+ struct nfs_access_entry *entry =
+ rb_entry(n, struct nfs_access_entry, rb_node);
+ int cmp = cred_fscmp(cred, entry->cred);
- if (cred < entry->cred)
+ if (cmp < 0)
n = n->rb_left;
- else if (cred > entry->cred)
+ else if (cmp > 0)
n = n->rb_right;
else
return entry;
@@ -2283,7 +2279,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st
return NULL;
}
-static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res, bool may_block)
+static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_access_entry *cache;
@@ -2326,7 +2322,7 @@ out_zap:
return -ENOENT;
}
-static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res)
{
/* Only check the most recently returned cache entry,
* but do it without locking.
@@ -2363,15 +2359,17 @@ static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *
struct rb_node **p = &root_node->rb_node;
struct rb_node *parent = NULL;
struct nfs_access_entry *entry;
+ int cmp;
spin_lock(&inode->i_lock);
while (*p != NULL) {
parent = *p;
entry = rb_entry(parent, struct nfs_access_entry, rb_node);
+ cmp = cred_fscmp(set->cred, entry->cred);
- if (set->cred < entry->cred)
+ if (cmp < 0)
p = &parent->rb_left;
- else if (set->cred > entry->cred)
+ else if (cmp > 0)
p = &parent->rb_right;
else
goto found;
@@ -2395,7 +2393,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
if (cache == NULL)
return;
RB_CLEAR_NODE(&cache->rb_node);
- cache->cred = get_rpccred(set->cred);
+ cache->cred = get_cred(set->cred);
cache->mask = set->mask;
/* The above field assignments must be visible
@@ -2459,7 +2457,7 @@ void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
}
EXPORT_SYMBOL_GPL(nfs_access_set_mask);
-static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
+static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
{
struct nfs_access_entry cache;
bool may_block = (mask & MAY_NOT_BLOCK) == 0;
@@ -2523,7 +2521,7 @@ static int nfs_open_permission_mask(int openflags)
return mask;
}
-int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
+int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags)
{
return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
}
@@ -2548,7 +2546,7 @@ static int nfs_execute_ok(struct inode *inode, int mask)
int nfs_permission(struct inode *inode, int mask)
{
- struct rpc_cred *cred;
+ const struct cred *cred = current_cred();
int res = 0;
nfs_inc_stats(inode, NFSIOS_VFSACCESS);
@@ -2582,20 +2580,11 @@ force_lookup:
/* Always try fast lookups first */
rcu_read_lock();
- cred = rpc_lookup_cred_nonblock();
- if (!IS_ERR(cred))
- res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
- else
- res = PTR_ERR(cred);
+ res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
rcu_read_unlock();
if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) {
/* Fast lookup failed, try the slow way */
- cred = rpc_lookup_cred();
- if (!IS_ERR(cred)) {
- res = nfs_do_access(inode, cred, mask);
- put_rpccred(cred);
- } else
- res = PTR_ERR(cred);
+ res = nfs_do_access(inode, cred, mask);
}
out:
if (!res && (mask & MAY_EXEC))
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 310d7500f665..63abe705f4ca 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -9,6 +9,7 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/sched/mm.h>
#include <linux/sunrpc/metrics.h>
@@ -27,9 +28,6 @@
#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)
#define FF_LAYOUTRETURN_MAXERR 20
-
-static struct group_info *ff_zero_group;
-
static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
struct nfs_pgio_header *hdr);
static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
@@ -226,16 +224,14 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
- struct rpc_cred *cred;
+ const struct cred *cred;
ff_layout_remove_mirror(mirror);
kfree(mirror->fh_versions);
cred = rcu_access_pointer(mirror->ro_cred);
- if (cred)
- put_rpccred(cred);
+ put_cred(cred);
cred = rcu_access_pointer(mirror->rw_cred);
- if (cred)
- put_rpccred(cred);
+ put_cred(cred);
nfs4_ff_layout_put_deviceid(mirror->mirror_ds);
kfree(mirror);
}
@@ -413,8 +409,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
for (i = 0; i < fls->mirror_array_cnt; i++) {
struct nfs4_ff_layout_mirror *mirror;
- struct auth_cred acred = { .group_info = ff_zero_group };
- struct rpc_cred __rcu *cred;
+ struct cred *kcred;
+ const struct cred *cred;
+ kuid_t uid;
+ kgid_t gid;
u32 ds_count, fh_count, id;
int j;
@@ -482,21 +480,28 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
if (rc)
goto out_err_free;
- acred.uid = make_kuid(&init_user_ns, id);
+ uid = make_kuid(&init_user_ns, id);
/* group */
rc = decode_name(&stream, &id);
if (rc)
goto out_err_free;
- acred.gid = make_kgid(&init_user_ns, id);
+ gid = make_kgid(&init_user_ns, id);
- /* find the cred for it */
- rcu_assign_pointer(cred, rpc_lookup_generic_cred(&acred, 0, gfp_flags));
- if (IS_ERR(cred)) {
- rc = PTR_ERR(cred);
- goto out_err_free;
+ if (gfp_flags & __GFP_FS)
+ kcred = prepare_kernel_cred(NULL);
+ else {
+ unsigned int nofs_flags = memalloc_nofs_save();
+ kcred = prepare_kernel_cred(NULL);
+ memalloc_nofs_restore(nofs_flags);
}
+ rc = -ENOMEM;
+ if (!kcred)
+ goto out_err_free;
+ kcred->fsuid = uid;
+ kcred->fsgid = gid;
+ cred = kcred;
if (lgr->range.iomode == IOMODE_READ)
rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
@@ -519,8 +524,8 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
dprintk("%s: iomode %s uid %u gid %u\n", __func__,
lgr->range.iomode == IOMODE_READ ? "READ" : "RW",
- from_kuid(&init_user_ns, acred.uid),
- from_kgid(&init_user_ns, acred.gid));
+ from_kuid(&init_user_ns, uid),
+ from_kgid(&init_user_ns, gid));
}
p = xdr_inline_decode(&stream, 4);
@@ -1698,7 +1703,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
- struct rpc_cred *ds_cred;
+ const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
u32 idx = hdr->pgio_mirror_idx;
int vers;
@@ -1749,7 +1754,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
vers == 3 ? &ff_layout_read_call_ops_v3 :
&ff_layout_read_call_ops_v4,
0, RPC_TASK_SOFTCONN);
- put_rpccred(ds_cred);
+ put_cred(ds_cred);
return PNFS_ATTEMPTED;
out_failed:
@@ -1765,7 +1770,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
- struct rpc_cred *ds_cred;
+ const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
int vers;
struct nfs_fh *fh;
@@ -1814,7 +1819,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
vers == 3 ? &ff_layout_write_call_ops_v3 :
&ff_layout_write_call_ops_v4,
sync, RPC_TASK_SOFTCONN);
- put_rpccred(ds_cred);
+ put_cred(ds_cred);
return PNFS_ATTEMPTED;
out_failed:
@@ -1844,7 +1849,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
- struct rpc_cred *ds_cred;
+ const struct cred *ds_cred;
u32 idx;
int vers, ret;
struct nfs_fh *fh;
@@ -1884,7 +1889,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
how, RPC_TASK_SOFTCONN);
- put_rpccred(ds_cred);
+ put_cred(ds_cred);
return ret;
out_err:
pnfs_generic_prepare_to_resend_writes(data);
@@ -2383,11 +2388,6 @@ static int __init nfs4flexfilelayout_init(void)
{
printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n",
__func__);
- if (!ff_zero_group) {
- ff_zero_group = groups_alloc(0);
- if (!ff_zero_group)
- return -ENOMEM;
- }
return pnfs_register_layoutdriver(&flexfilelayout_type);
}
@@ -2396,10 +2396,6 @@ static void __exit nfs4flexfilelayout_exit(void)
printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n",
__func__);
pnfs_unregister_layoutdriver(&flexfilelayout_type);
- if (ff_zero_group) {
- put_group_info(ff_zero_group);
- ff_zero_group = NULL;
- }
}
MODULE_ALIAS("nfs-layouttype4-4");
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index de50a342d5a5..c2626bad466b 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -81,8 +81,8 @@ struct nfs4_ff_layout_mirror {
u32 fh_versions_cnt;
struct nfs_fh *fh_versions;
nfs4_stateid stateid;
- struct rpc_cred __rcu *ro_cred;
- struct rpc_cred __rcu *rw_cred;
+ const struct cred __rcu *ro_cred;
+ const struct cred __rcu *rw_cred;
refcount_t ref;
spinlock_t lock;
unsigned long flags;
@@ -229,8 +229,8 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg,
u32 ds_idx,
struct nfs_client *ds_clp,
struct inode *inode);
-struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg,
- u32 ds_idx, struct rpc_cred *mdscred);
+const struct cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg,
+ u32 ds_idx, const struct cred *mdscred);
bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg);
bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg);
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index d23347389626..11766a74216d 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -330,10 +330,10 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
return 0;
}
-static struct rpc_cred *
+static const struct cred *
ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
{
- struct rpc_cred *cred, __rcu **pcred;
+ const struct cred *cred, __rcu **pcred;
if (iomode == IOMODE_READ)
pcred = &mirror->ro_cred;
@@ -346,7 +346,7 @@ ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
if (!cred)
break;
- cred = get_rpccred_rcu(cred);
+ cred = get_cred_rcu(cred);
} while(!cred);
rcu_read_unlock();
return cred;
@@ -465,19 +465,19 @@ out:
return ds;
}
-struct rpc_cred *
+const struct cred *
ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
- struct rpc_cred *mdscred)
+ const struct cred *mdscred)
{
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
- struct rpc_cred *cred;
+ const struct cred *cred;
if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) {
cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode);
if (!cred)
- cred = get_rpccred(mdscred);
+ cred = get_cred(mdscred);
} else {
- cred = get_rpccred(mdscred);
+ cred = get_cred(mdscred);
}
return cred;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5b1eee4952b7..094775ea0781 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -950,18 +950,17 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry,
struct file *filp)
{
struct nfs_open_context *ctx;
- struct rpc_cred *cred = rpc_lookup_cred();
- if (IS_ERR(cred))
- return ERR_CAST(cred);
+ const struct cred *cred = get_current_cred();
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx) {
- put_rpccred(cred);
+ put_cred(cred);
return ERR_PTR(-ENOMEM);
}
nfs_sb_active(dentry->d_sb);
ctx->dentry = dget(dentry);
ctx->cred = cred;
+ ctx->ll_cred = NULL;
ctx->state = NULL;
ctx->mode = f_mode;
ctx->flags = 0;
@@ -997,10 +996,10 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
}
if (inode != NULL)
NFS_PROTO(inode)->close_context(ctx, is_sync);
- if (ctx->cred != NULL)
- put_rpccred(ctx->cred);
+ put_cred(ctx->cred);
dput(ctx->dentry);
nfs_sb_deactive(sb);
+ put_rpccred(ctx->ll_cred);
kfree(ctx->mdsthreshold);
kfree_rcu(ctx, rcu_head);
}
@@ -1042,7 +1041,7 @@ EXPORT_SYMBOL_GPL(nfs_file_set_open_context);
/*
* Given an inode, search for an open context with the desired characteristics
*/
-struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode)
+struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct cred *cred, fmode_t mode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *pos, *ctx = NULL;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8357ff69962f..b1e577302518 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -123,7 +123,7 @@ struct nfs_parsed_mount_data {
unsigned short protocol;
} nfs_server;
- struct security_mnt_opts lsm_opts;
+ void *lsm_opts;
struct net *net;
};
@@ -254,7 +254,7 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
void nfs_pgio_header_free(struct nfs_pgio_header *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
- struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops,
+ const struct cred *cred, const struct nfs_rpc_ops *rpc_ops,
const struct rpc_call_ops *call_ops, int how, int flags);
void nfs_free_request(struct nfs_page *req);
struct nfs_pgio_mirror *
@@ -269,7 +269,7 @@ static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc)
static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
const struct nfs_open_context *ctx2)
{
- return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
+ return cred_fscmp(ctx1->cred, ctx2->cred) == 0 && ctx1->state == ctx2->state;
}
/* nfs2xdr.c */
@@ -395,7 +395,6 @@ extern const struct super_operations nfs_sops;
extern struct file_system_type nfs_fs_type;
extern struct file_system_type nfs_xdev_fs_type;
#if IS_ENABLED(CONFIG_NFS_V4)
-extern struct file_system_type nfs4_xdev_fs_type;
extern struct file_system_type nfs4_referral_fs_type;
#endif
bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
@@ -565,13 +564,13 @@ extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
const struct nfs_client_initdata *);
extern int nfs40_walk_client_list(struct nfs_client *clp,
struct nfs_client **result,
- struct rpc_cred *cred);
+ const struct cred *cred);
extern int nfs41_walk_client_list(struct nfs_client *clp,
struct nfs_client **result,
- struct rpc_cred *cred);
-extern int nfs4_test_session_trunk(struct rpc_clnt *,
- struct rpc_xprt *,
- void *);
+ const struct cred *cred);
+extern void nfs4_test_session_trunk(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt,
+ void *data);
static inline struct inode *nfs_igrab_and_active(struct inode *inode)
{
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 71bc16225b98..a3ad2d46fd42 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -607,7 +607,7 @@ out:
* readdirplus.
*/
static int
-nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred,
u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct inode *dir = d_inode(dentry);
@@ -628,7 +628,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
.rpc_proc = &nfs3_procedures[NFS3PROC_READDIR],
.rpc_argp = &arg,
.rpc_resp = &res,
- .rpc_cred = cred
+ .rpc_cred = cred,
};
int status = -ENOMEM;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 1b994b527518..06ac3d9ac7c6 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -62,10 +62,11 @@ struct nfs4_minor_version_ops {
void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *);
int (*test_and_free_expired)(struct nfs_server *,
- nfs4_stateid *, struct rpc_cred *);
+ nfs4_stateid *, const struct cred *);
struct nfs_seqid *
(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
- int (*session_trunk)(struct rpc_clnt *, struct rpc_xprt *, void *);
+ void (*session_trunk)(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt, void *data);
const struct rpc_call_ops *call_sync_ops;
const struct nfs4_state_recovery_ops *reboot_recovery_ops;
const struct nfs4_state_recovery_ops *nograce_recovery_ops;
@@ -107,7 +108,7 @@ struct nfs4_state_owner {
unsigned long so_expires;
struct rb_node so_server_node;
- struct rpc_cred *so_cred; /* Associated cred */
+ const struct cred *so_cred; /* Associated cred */
spinlock_t so_lock;
atomic_t so_count;
@@ -212,10 +213,10 @@ struct nfs4_state_recovery_ops {
int state_flag_bit;
int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
int (*recover_lock)(struct nfs4_state *, struct file_lock *);
- int (*establish_clid)(struct nfs_client *, struct rpc_cred *);
- int (*reclaim_complete)(struct nfs_client *, struct rpc_cred *);
+ int (*establish_clid)(struct nfs_client *, const struct cred *);
+ int (*reclaim_complete)(struct nfs_client *, const struct cred *);
int (*detect_trunking)(struct nfs_client *, struct nfs_client **,
- struct rpc_cred *);
+ const struct cred *);
};
struct nfs4_opendata {
@@ -245,19 +246,19 @@ struct nfs4_opendata {
struct nfs4_add_xprt_data {
struct nfs_client *clp;
- struct rpc_cred *cred;
+ const struct cred *cred;
};
struct nfs4_state_maintenance_ops {
- int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned);
- struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *);
- int (*renew_lease)(struct nfs_client *, struct rpc_cred *);
+ int (*sched_state_renewal)(struct nfs_client *, const struct cred *, unsigned);
+ const struct cred * (*get_state_renewal_cred)(struct nfs_client *);
+ int (*renew_lease)(struct nfs_client *, const struct cred *);
};
struct nfs4_mig_recovery_ops {
int (*get_locations)(struct inode *, struct nfs4_fs_locations *,
- struct page *, struct rpc_cred *);
- int (*fsid_present)(struct inode *, struct rpc_cred *);
+ struct page *, const struct cred *);
+ int (*fsid_present)(struct inode *, const struct cred *);
};
extern const struct dentry_operations nfs4_dentry_operations;
@@ -286,21 +287,21 @@ extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
struct rpc_message *, struct nfs4_sequence_args *,
struct nfs4_sequence_res *, int);
extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int, int);
-extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
-extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
+extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, const struct cred *, struct nfs4_setclientid_res *);
+extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, const struct cred *);
extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
-extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred);
-extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
+extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, const struct cred *cred);
+extern int nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred);
extern int nfs4_destroy_clientid(struct nfs_client *clp);
-extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
-extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_init_clientid(struct nfs_client *, const struct cred *);
+extern int nfs41_init_clientid(struct nfs_client *, const struct cred *);
extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
struct nfs4_fs_locations *, struct page *);
extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
- struct page *page, struct rpc_cred *);
-extern int nfs4_proc_fsid_present(struct inode *, struct rpc_cred *);
+ struct page *page, const struct cred *);
+extern int nfs4_proc_fsid_present(struct inode *, const struct cred *);
extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, const struct qstr *,
struct nfs_fh *, struct nfs_fattr *);
extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
@@ -312,8 +313,8 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
#if defined(CONFIG_NFS_V4_1)
extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *);
-extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
+extern int nfs4_proc_create_session(struct nfs_client *, const struct cred *);
+extern int nfs4_proc_destroy_session(struct nfs4_session *, const struct cred *);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
@@ -338,7 +339,6 @@ static inline bool
_nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
struct rpc_clnt **clntp, struct rpc_message *msg)
{
- struct rpc_cred *newcred = NULL;
rpc_authflavor_t flavor;
if (sp4_mode == NFS_SP4_MACH_CRED_CLEANUP ||
@@ -353,13 +353,7 @@ _nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
return false;
}
if (test_bit(sp4_mode, &clp->cl_sp4_flags)) {
- spin_lock(&clp->cl_lock);
- if (clp->cl_machine_cred != NULL)
- /* don't call get_rpccred on the machine cred -
- * a reference will be held for life of clp */
- newcred = clp->cl_machine_cred;
- spin_unlock(&clp->cl_lock);
- msg->rpc_cred = newcred;
+ msg->rpc_cred = rpc_machine_cred();
flavor = clp->cl_rpcclient->cl_auth->au_flavor;
WARN_ON_ONCE(flavor != RPC_AUTH_GSS_KRB5I &&
@@ -450,16 +444,16 @@ extern void nfs4_set_lease_period(struct nfs_client *clp,
/* nfs4state.c */
-struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp);
-struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
-struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
+const struct cred *nfs4_get_clid_cred(struct nfs_client *clp);
+const struct cred *nfs4_get_machine_cred(struct nfs_client *clp);
+const struct cred *nfs4_get_renew_cred(struct nfs_client *clp);
int nfs4_discover_server_trunking(struct nfs_client *clp,
struct nfs_client **);
int nfs40_discover_server_trunking(struct nfs_client *clp,
- struct nfs_client **, struct rpc_cred *);
+ struct nfs_client **, const struct cred *);
#if defined(CONFIG_NFS_V4_1)
int nfs41_discover_server_trunking(struct nfs_client *clp,
- struct nfs_client **, struct rpc_cred *);
+ struct nfs_client **, const struct cred *);
extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
extern void nfs41_notify_server(struct nfs_client *);
#else
@@ -468,7 +462,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session,
}
#endif /* CONFIG_NFS_V4_1 */
-extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t);
+extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, const struct cred *, gfp_t);
extern void nfs4_put_state_owner(struct nfs4_state_owner *);
extern void nfs4_purge_state_owners(struct nfs_server *);
extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
@@ -494,7 +488,7 @@ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t,
const struct nfs_lock_context *, nfs4_stateid *,
- struct rpc_cred **);
+ const struct cred **);
extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst,
struct nfs4_state *state);
extern bool nfs4_copy_open_stateid(nfs4_stateid *dst,
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8f53455c4765..2548405da1f7 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -545,7 +545,7 @@ static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new,
*/
int nfs40_walk_client_list(struct nfs_client *new,
struct nfs_client **result,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
struct nfs_client *pos, *prev = NULL;
@@ -711,7 +711,7 @@ out_err:
*/
int nfs41_walk_client_list(struct nfs_client *new,
struct nfs_client **result,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
struct nfs_client *pos, *prev = NULL;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 46d691ba04bc..45b2322e092d 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,15 +133,9 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t count, unsigned int flags)
{
- ssize_t ret;
-
if (file_inode(file_in) == file_inode(file_out))
return -EINVAL;
-retry:
- ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
- if (ret == -EAGAIN)
- goto retry;
- return ret;
+ return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
}
static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 867457d6dfbe..557a5d636183 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -93,19 +93,19 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinf
static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label, struct inode *inode);
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode);
-static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
+static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
struct nfs_open_context *ctx, struct nfs4_label *ilabel,
struct nfs4_label *olabel);
#ifdef CONFIG_NFS_V4_1
static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
- struct rpc_cred *cred,
+ const struct cred *cred,
struct nfs4_slot *slot,
bool is_privileged);
static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
- struct rpc_cred *);
+ const struct cred *);
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
- struct rpc_cred *, bool);
+ const struct cred *, bool);
#endif
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
@@ -361,7 +361,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
static void nfs4_test_and_free_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
const struct nfs4_minor_version_ops *ops = server->nfs_client->cl_mvops;
@@ -370,7 +370,7 @@ static void nfs4_test_and_free_stateid(struct nfs_server *server,
static void __nfs4_free_revoked_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
stateid->type = NFS4_REVOKED_STATEID_TYPE;
nfs4_test_and_free_stateid(server, stateid, cred);
@@ -378,7 +378,7 @@ static void __nfs4_free_revoked_stateid(struct nfs_server *server,
static void nfs4_free_revoked_stateid(struct nfs_server *server,
const nfs4_stateid *stateid,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
nfs4_stateid tmp;
@@ -908,7 +908,7 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
static void
nfs4_sequence_process_interrupted(struct nfs_client *client,
- struct nfs4_slot *slot, struct rpc_cred *cred)
+ struct nfs4_slot *slot, const struct cred *cred)
{
struct rpc_task *task;
@@ -939,7 +939,7 @@ EXPORT_SYMBOL_GPL(nfs4_sequence_done);
static void
nfs4_sequence_process_interrupted(struct nfs_client *client,
- struct nfs4_slot *slot, struct rpc_cred *cred)
+ struct nfs4_slot *slot, const struct cred *cred)
{
WARN_ON_ONCE(1);
slot->interrupted = 0;
@@ -2484,7 +2484,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
* Note that in the non-execute case, we want to turn off permission
* checking if we just created a new file (POSIX open() semantics).
*/
-static int nfs4_opendata_access(struct rpc_cred *cred,
+static int nfs4_opendata_access(const struct cred *cred,
struct nfs4_opendata *opendata,
struct nfs4_state *state, fmode_t fmode,
int openflags)
@@ -2651,7 +2651,7 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
static int nfs40_test_and_free_expired_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
return -NFS4ERR_BAD_STATEID;
}
@@ -2659,7 +2659,7 @@ static int nfs40_test_and_free_expired_stateid(struct nfs_server *server,
#if defined(CONFIG_NFS_V4_1)
static int nfs41_test_and_free_expired_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
int status;
@@ -2693,7 +2693,7 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
struct nfs_server *server = NFS_SERVER(state->inode);
nfs4_stateid stateid;
struct nfs_delegation *delegation;
- struct rpc_cred *cred;
+ const struct cred *cred = NULL;
int status;
/* Get the delegation credential for use by test/free_stateid */
@@ -2718,14 +2718,16 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
return;
}
- cred = get_rpccred(delegation->cred);
+ if (delegation->cred)
+ cred = get_cred(delegation->cred);
rcu_read_unlock();
status = nfs41_test_and_free_expired_stateid(server, &stateid, cred);
trace_nfs4_test_delegation_stateid(state, NULL, status);
if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID)
nfs_finish_clear_delegation_stateid(state, &stateid);
- put_rpccred(cred);
+ if (delegation->cred)
+ put_cred(cred);
}
/**
@@ -2748,7 +2750,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state)
spin_lock(&state->state_lock);
list_for_each_entry(lsp, &state->lock_states, ls_locks) {
if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
- struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
+ const struct cred *cred = lsp->ls_state->owner->so_cred;
refcount_inc(&lsp->ls_count);
spin_unlock(&state->state_lock);
@@ -2792,7 +2794,7 @@ static int nfs41_check_open_stateid(struct nfs4_state *state)
{
struct nfs_server *server = NFS_SERVER(state->inode);
nfs4_stateid *stateid = &state->open_stateid;
- struct rpc_cred *cred = state->owner->so_cred;
+ const struct cred *cred = state->owner->so_cred;
int status;
if (test_bit(NFS_OPEN_STATE, &state->flags) == 0) {
@@ -2950,7 +2952,7 @@ static int _nfs4_do_open(struct inode *dir,
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_opendata *opendata;
struct dentry *dentry = ctx->dentry;
- struct rpc_cred *cred = ctx->cred;
+ const struct cred *cred = ctx->cred;
struct nfs4_threshold **ctx_th = &ctx->mdsthreshold;
fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC);
enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL;
@@ -3120,7 +3122,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
static int _nfs4_do_setattr(struct inode *inode,
struct nfs_setattrargs *arg,
struct nfs_setattrres *res,
- struct rpc_cred *cred,
+ const struct cred *cred,
struct nfs_open_context *ctx)
{
struct nfs_server *server = NFS_SERVER(inode);
@@ -3130,7 +3132,7 @@ static int _nfs4_do_setattr(struct inode *inode,
.rpc_resp = res,
.rpc_cred = cred,
};
- struct rpc_cred *delegation_cred = NULL;
+ const struct cred *delegation_cred = NULL;
unsigned long timestamp = jiffies;
bool truncate;
int status;
@@ -3165,14 +3167,14 @@ zero_stateid:
status = nfs4_call_sync(server->client, server, &msg, &arg->seq_args, &res->seq_res, 1);
- put_rpccred(delegation_cred);
+ put_cred(delegation_cred);
if (status == 0 && ctx != NULL)
renew_lease(server, timestamp);
trace_nfs4_setattr(inode, &arg->stateid, status);
return status;
}
-static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
+static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
struct nfs_open_context *ctx, struct nfs4_label *ilabel,
struct nfs4_label *olabel)
@@ -3973,7 +3975,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
struct iattr *sattr)
{
struct inode *inode = d_inode(dentry);
- struct rpc_cred *cred = NULL;
+ const struct cred *cred = NULL;
struct nfs_open_context *ctx = NULL;
struct nfs4_label *label = NULL;
int status;
@@ -4202,7 +4204,6 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
return -ENOMEM;
args.bitmask = server->cache_consistency_bitmask;
}
-
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
if (!status) {
nfs_access_set_mask(entry, res.access);
@@ -4691,7 +4692,7 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
return err;
}
-static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct inode *dir = d_inode(dentry);
@@ -4729,7 +4730,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
return status;
}
-static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct nfs4_exception exception = { };
@@ -5257,7 +5258,7 @@ static const struct rpc_call_ops nfs4_renew_ops = {
.rpc_release = nfs4_renew_release,
};
-static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
+static int nfs4_proc_async_renew(struct nfs_client *clp, const struct cred *cred, unsigned renew_flags)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -5281,7 +5282,7 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred,
&nfs4_renew_ops, data);
}
-static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
+static int nfs4_proc_renew(struct nfs_client *clp, const struct cred *cred)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -5696,7 +5697,6 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen)
{
struct nfs4_label ilabel, *olabel = NULL;
struct nfs_fattr fattr;
- struct rpc_cred *cred;
int status;
if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL))
@@ -5709,10 +5709,6 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen)
ilabel.label = (char *)buf;
ilabel.len = buflen;
- cred = rpc_lookup_cred();
- if (IS_ERR(cred))
- return PTR_ERR(cred);
-
olabel = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
if (IS_ERR(olabel)) {
status = -PTR_ERR(olabel);
@@ -5725,7 +5721,6 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen)
nfs4_label_free(olabel);
out:
- put_rpccred(cred);
return status;
}
#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
@@ -5894,13 +5889,13 @@ static const struct rpc_call_ops nfs4_setclientid_ops = {
* @clp: state data structure
* @program: RPC program for NFSv4 callback service
* @port: IP port number for NFS4 callback service
- * @cred: RPC credential to use for this call
+ * @cred: credential to use for this call
* @res: where to place the result
*
* Returns zero, a negative errno, or a negative NFS4ERR status code.
*/
int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
- unsigned short port, struct rpc_cred *cred,
+ unsigned short port, const struct cred *cred,
struct nfs4_setclientid_res *res)
{
nfs4_verifier sc_verifier;
@@ -5969,13 +5964,13 @@ out:
* nfs4_proc_setclientid_confirm - Confirm client ID
* @clp: state data structure
* @res: result of a previous SETCLIENTID
- * @cred: RPC credential to use for this call
+ * @cred: credential to use for this call
*
* Returns zero, a negative errno, or a negative NFS4ERR status code.
*/
int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
struct nfs4_setclientid_res *arg,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM],
@@ -6138,7 +6133,7 @@ static const struct rpc_call_ops nfs4_delegreturn_ops = {
.rpc_release = nfs4_delegreturn_release,
};
-static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync)
+static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync)
{
struct nfs4_delegreturndata *data;
struct nfs_server *server = NFS_SERVER(inode);
@@ -6205,7 +6200,7 @@ out:
return status;
}
-int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync)
+int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
@@ -6311,7 +6306,8 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
/* Ensure we don't close file until we're done freeing locks! */
p->ctx = get_nfs_open_context(ctx);
p->l_ctx = nfs_get_lock_context(ctx);
- memcpy(&p->fl, fl, sizeof(p->fl));
+ locks_init_lock(&p->fl);
+ locks_copy_lock(&p->fl, fl);
p->server = NFS_SERVER(inode);
return p;
}
@@ -6533,7 +6529,8 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->server = server;
refcount_inc(&lsp->ls_count);
p->ctx = get_nfs_open_context(ctx);
- memcpy(&p->fl, fl, sizeof(p->fl));
+ locks_init_lock(&p->fl);
+ locks_copy_lock(&p->fl, fl);
return p;
out_free_seqid:
nfs_free_seqid(p->arg.open_seqid);
@@ -7266,7 +7263,7 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
*/
static int _nfs40_proc_get_locations(struct inode *inode,
struct nfs4_fs_locations *locations,
- struct page *page, struct rpc_cred *cred)
+ struct page *page, const struct cred *cred)
{
struct nfs_server *server = NFS_SERVER(inode);
struct rpc_clnt *clnt = server->client;
@@ -7323,7 +7320,7 @@ static int _nfs40_proc_get_locations(struct inode *inode,
*/
static int _nfs41_proc_get_locations(struct inode *inode,
struct nfs4_fs_locations *locations,
- struct page *page, struct rpc_cred *cred)
+ struct page *page, const struct cred *cred)
{
struct nfs_server *server = NFS_SERVER(inode);
struct rpc_clnt *clnt = server->client;
@@ -7382,7 +7379,7 @@ static int _nfs41_proc_get_locations(struct inode *inode,
*/
int nfs4_proc_get_locations(struct inode *inode,
struct nfs4_fs_locations *locations,
- struct page *page, struct rpc_cred *cred)
+ struct page *page, const struct cred *cred)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
@@ -7413,7 +7410,7 @@ int nfs4_proc_get_locations(struct inode *inode,
* is appended to this compound to identify the client ID which is
* performing recovery.
*/
-static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
+static int _nfs40_proc_fsid_present(struct inode *inode, const struct cred *cred)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
@@ -7459,7 +7456,7 @@ static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
* this operation is identified in the SEQUENCE operation in this
* compound.
*/
-static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
+static int _nfs41_proc_fsid_present(struct inode *inode, const struct cred *cred)
{
struct nfs_server *server = NFS_SERVER(inode);
struct rpc_clnt *clnt = server->client;
@@ -7506,7 +7503,7 @@ static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
* NFS4ERR code if some error occurred on the server, or a
* negative errno if a local failure occurred.
*/
-int nfs4_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
+int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
@@ -7553,7 +7550,7 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
.rpc_resp = &res,
};
struct rpc_clnt *clnt = NFS_SERVER(dir)->client;
- struct rpc_cred *cred = NULL;
+ const struct cred *cred = NULL;
if (use_integrity) {
clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient;
@@ -7570,8 +7567,7 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
&res.seq_res, 0);
dprintk("NFS reply secinfo: %d\n", status);
- if (cred)
- put_rpccred(cred);
+ put_cred(cred);
return status;
}
@@ -7652,7 +7648,7 @@ static
int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
struct nfs_client *clp,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
int status;
struct nfs41_bind_conn_to_session_args args = {
@@ -7714,7 +7710,7 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
struct rpc_bind_conn_calldata {
struct nfs_client *clp;
- struct rpc_cred *cred;
+ const struct cred *cred;
};
static int
@@ -7727,7 +7723,7 @@ nfs4_proc_bind_conn_to_session_callback(struct rpc_clnt *clnt,
return nfs4_proc_bind_one_conn_to_session(clnt, xprt, p->clp, p->cred);
}
-int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred)
+int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, const struct cred *cred)
{
struct rpc_bind_conn_calldata data = {
.clp = clp,
@@ -7893,7 +7889,7 @@ static const struct rpc_call_ops nfs4_exchange_id_call_ops = {
* Wrapper for EXCHANGE_ID operation.
*/
static struct rpc_task *
-nfs4_run_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
+nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred,
u32 sp4_how, struct rpc_xprt *xprt)
{
struct rpc_message msg = {
@@ -7989,7 +7985,7 @@ out:
*
* Wrapper for EXCHANGE_ID operation.
*/
-static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
+static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred,
u32 sp4_how)
{
struct rpc_task *task;
@@ -8056,7 +8052,7 @@ out:
*
* Will attempt to negotiate SP4_MACH_CRED if krb5i / krb5p auth is used.
*/
-int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
+int nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred)
{
rpc_authflavor_t authflavor = clp->cl_rpcclient->cl_auth->au_flavor;
int status;
@@ -8088,7 +8084,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
* @xprt: the rpc_xprt to test
* @data: call data for _nfs4_proc_exchange_id.
*/
-int nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
+void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
void *data)
{
struct nfs4_add_xprt_data *adata = (struct nfs4_add_xprt_data *)data;
@@ -8105,20 +8101,22 @@ int nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
/* Test connection for session trunking. Async exchange_id call */
task = nfs4_run_exchange_id(adata->clp, adata->cred, sp4_how, xprt);
if (IS_ERR(task))
- return PTR_ERR(task);
+ return;
status = task->tk_status;
if (status == 0)
status = nfs4_detect_session_trunking(adata->clp,
task->tk_msg.rpc_resp, xprt);
+ if (status == 0)
+ rpc_clnt_xprt_switch_add_xprt(clnt, xprt);
+
rpc_put_task(task);
- return status;
}
EXPORT_SYMBOL_GPL(nfs4_test_session_trunk);
static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_CLIENTID],
@@ -8136,7 +8134,7 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
}
static int nfs4_proc_destroy_clientid(struct nfs_client *clp,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
unsigned int loop;
int ret;
@@ -8157,7 +8155,7 @@ static int nfs4_proc_destroy_clientid(struct nfs_client *clp,
int nfs4_destroy_clientid(struct nfs_client *clp)
{
- struct rpc_cred *cred;
+ const struct cred *cred;
int ret = 0;
if (clp->cl_mvops->minor_version < 1)
@@ -8168,8 +8166,7 @@ int nfs4_destroy_clientid(struct nfs_client *clp)
goto out;
cred = nfs4_get_clid_cred(clp);
ret = nfs4_proc_destroy_clientid(clp, cred);
- if (cred)
- put_rpccred(cred);
+ put_cred(cred);
switch (ret) {
case 0:
case -NFS4ERR_STALE_CLIENTID:
@@ -8385,7 +8382,7 @@ static void nfs4_update_session(struct nfs4_session *session,
}
static int _nfs4_proc_create_session(struct nfs_client *clp,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
struct nfs4_session *session = clp->cl_session;
struct nfs41_create_session_args args = {
@@ -8437,7 +8434,7 @@ out:
* It is the responsibility of the caller to verify the session is
* expired before calling this routine.
*/
-int nfs4_proc_create_session(struct nfs_client *clp, struct rpc_cred *cred)
+int nfs4_proc_create_session(struct nfs_client *clp, const struct cred *cred)
{
int status;
unsigned *ptr;
@@ -8468,7 +8465,7 @@ out:
* The caller must serialize access to this routine.
*/
int nfs4_proc_destroy_session(struct nfs4_session *session,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION],
@@ -8570,7 +8567,7 @@ static const struct rpc_call_ops nfs41_sequence_ops = {
};
static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
- struct rpc_cred *cred,
+ const struct cred *cred,
struct nfs4_slot *slot,
bool is_privileged)
{
@@ -8613,7 +8610,7 @@ out_err:
return ret;
}
-static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
+static int nfs41_proc_async_sequence(struct nfs_client *clp, const struct cred *cred, unsigned renew_flags)
{
struct rpc_task *task;
int ret = 0;
@@ -8629,7 +8626,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
return ret;
}
-static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+static int nfs4_proc_sequence(struct nfs_client *clp, const struct cred *cred)
{
struct rpc_task *task;
int ret;
@@ -8725,7 +8722,7 @@ static const struct rpc_call_ops nfs4_reclaim_complete_call_ops = {
* Issue a global reclaim complete.
*/
static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
struct nfs4_reclaim_complete_data *calldata;
struct rpc_task *task;
@@ -9078,7 +9075,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
static int
_nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *pdev,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
struct nfs4_getdeviceinfo_args args = {
.pdev = pdev,
@@ -9110,7 +9107,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server,
int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *pdev,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
struct nfs4_exception exception = { };
int err;
@@ -9167,7 +9164,7 @@ static void nfs4_layoutcommit_release(void *calldata)
pnfs_cleanup_layoutcommit(data);
nfs_post_op_update_inode_force_wcc(data->args.inode,
data->res.fattr);
- put_rpccred(data->cred);
+ put_cred(data->cred);
nfs_iput_and_deactive(data->inode);
kfree(data);
}
@@ -9243,7 +9240,7 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
.rpc_resp = &res,
};
struct rpc_clnt *clnt = server->client;
- struct rpc_cred *cred = NULL;
+ const struct cred *cred = NULL;
int status;
if (use_integrity) {
@@ -9257,8 +9254,7 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
&res.seq_res, 0);
dprintk("<-- %s status=%d\n", __func__, status);
- if (cred)
- put_rpccred(cred);
+ put_cred(cred);
return status;
}
@@ -9371,7 +9367,7 @@ out:
static int _nfs41_test_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
int status;
struct nfs41_test_stateid_args args = {
@@ -9432,7 +9428,7 @@ static void nfs4_handle_delay_or_session_error(struct nfs_server *server,
*/
static int nfs41_test_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
struct nfs4_exception exception = { };
int err;
@@ -9494,7 +9490,7 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = {
*/
static int nfs41_free_stateid(struct nfs_server *server,
const nfs4_stateid *stateid,
- struct rpc_cred *cred,
+ const struct cred *cred,
bool privileged)
{
struct rpc_message msg = {
@@ -9535,7 +9531,7 @@ static int nfs41_free_stateid(struct nfs_server *server,
static void
nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
{
- struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
+ const struct cred *cred = lsp->ls_state->owner->so_cred;
nfs41_free_stateid(server, &lsp->ls_stateid, cred, false);
nfs4_free_lock_state(server, lsp);
@@ -9606,14 +9602,14 @@ static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
static const struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = {
.sched_state_renewal = nfs4_proc_async_renew,
- .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked,
+ .get_state_renewal_cred = nfs4_get_renew_cred,
.renew_lease = nfs4_proc_renew,
};
#if defined(CONFIG_NFS_V4_1)
static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
.sched_state_renewal = nfs41_proc_async_sequence,
- .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked,
+ .get_state_renewal_cred = nfs4_get_machine_cred,
.renew_lease = nfs4_proc_sequence,
};
#endif
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 1f8c2ae43a8d..6ea431b067dd 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -57,7 +57,7 @@ nfs4_renew_state(struct work_struct *work)
const struct nfs4_state_maintenance_ops *ops;
struct nfs_client *clp =
container_of(work, struct nfs_client, cl_renewd.work);
- struct rpc_cred *cred;
+ const struct cred *cred;
long lease;
unsigned long last, now;
unsigned renew_flags = 0;
@@ -68,7 +68,6 @@ nfs4_renew_state(struct work_struct *work)
if (test_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state))
goto out;
- spin_lock(&clp->cl_lock);
lease = clp->cl_lease_time;
last = clp->cl_last_renewal;
now = jiffies;
@@ -79,8 +78,7 @@ nfs4_renew_state(struct work_struct *work)
renew_flags |= NFS4_RENEW_DELEGATION_CB;
if (renew_flags != 0) {
- cred = ops->get_state_renewal_cred_locked(clp);
- spin_unlock(&clp->cl_lock);
+ cred = ops->get_state_renewal_cred(clp);
if (cred == NULL) {
if (!(renew_flags & NFS4_RENEW_DELEGATION_CB)) {
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
@@ -92,7 +90,7 @@ nfs4_renew_state(struct work_struct *work)
/* Queue an asynchronous RENEW. */
ret = ops->sched_state_renewal(clp, cred, renew_flags);
- put_rpccred(cred);
+ put_cred(cred);
switch (ret) {
default:
goto out_exp;
@@ -104,7 +102,6 @@ nfs4_renew_state(struct work_struct *work)
} else {
dprintk("%s: failed to call renewd. Reason: lease not expired \n",
__func__);
- spin_unlock(&clp->cl_lock);
}
nfs4_schedule_state_renewal(clp);
out_exp:
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index 769b85655c4b..a5489d70a724 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -573,12 +573,11 @@ static void nfs4_destroy_session_slot_tables(struct nfs4_session *session)
void nfs4_destroy_session(struct nfs4_session *session)
{
struct rpc_xprt *xprt;
- struct rpc_cred *cred;
+ const struct cred *cred;
cred = nfs4_get_clid_cred(session->clp);
nfs4_proc_destroy_session(session, cred);
- if (cred)
- put_rpccred(cred);
+ put_cred(cred);
rcu_read_lock();
xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index d8decf2ec48f..02488b50534a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -87,7 +87,7 @@ const nfs4_stateid current_stateid = {
static DEFINE_MUTEX(nfs_clid_init_mutex);
-int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
+int nfs4_init_clientid(struct nfs_client *clp, const struct cred *cred)
{
struct nfs4_setclientid_res clid = {
.clientid = clp->cl_clientid,
@@ -134,7 +134,7 @@ out:
*/
int nfs40_discover_server_trunking(struct nfs_client *clp,
struct nfs_client **result,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
struct nfs4_setclientid_res clid = {
.clientid = clp->cl_clientid,
@@ -164,32 +164,23 @@ out:
return status;
}
-struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp)
+const struct cred *nfs4_get_machine_cred(struct nfs_client *clp)
{
- struct rpc_cred *cred = NULL;
-
- if (clp->cl_machine_cred != NULL)
- cred = get_rpccred(clp->cl_machine_cred);
- return cred;
+ return get_cred(rpc_machine_cred());
}
static void nfs4_root_machine_cred(struct nfs_client *clp)
{
- struct rpc_cred *cred, *new;
- new = rpc_lookup_machine_cred(NULL);
- spin_lock(&clp->cl_lock);
- cred = clp->cl_machine_cred;
- clp->cl_machine_cred = new;
- spin_unlock(&clp->cl_lock);
- if (cred != NULL)
- put_rpccred(cred);
+ /* Force root creds instead of machine */
+ clp->cl_principal = NULL;
+ clp->cl_rpcclient->cl_principal = NULL;
}
-static struct rpc_cred *
+static const struct cred *
nfs4_get_renew_cred_server_locked(struct nfs_server *server)
{
- struct rpc_cred *cred = NULL;
+ const struct cred *cred = NULL;
struct nfs4_state_owner *sp;
struct rb_node *pos;
@@ -199,29 +190,30 @@ nfs4_get_renew_cred_server_locked(struct nfs_server *server)
sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
if (list_empty(&sp->so_states))
continue;
- cred = get_rpccred(sp->so_cred);
+ cred = get_cred(sp->so_cred);
break;
}
return cred;
}
/**
- * nfs4_get_renew_cred_locked - Acquire credential for a renew operation
+ * nfs4_get_renew_cred - Acquire credential for a renew operation
* @clp: client state handle
*
* Returns an rpc_cred with reference count bumped, or NULL.
* Caller must hold clp->cl_lock.
*/
-struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
+const struct cred *nfs4_get_renew_cred(struct nfs_client *clp)
{
- struct rpc_cred *cred = NULL;
+ const struct cred *cred = NULL;
struct nfs_server *server;
/* Use machine credentials if available */
- cred = nfs4_get_machine_cred_locked(clp);
+ cred = nfs4_get_machine_cred(clp);
if (cred != NULL)
goto out;
+ spin_lock(&clp->cl_lock);
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
cred = nfs4_get_renew_cred_server_locked(server);
@@ -229,6 +221,7 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
break;
}
rcu_read_unlock();
+ spin_unlock(&clp->cl_lock);
out:
return cred;
@@ -319,7 +312,7 @@ static void nfs41_finish_session_reset(struct nfs_client *clp)
nfs41_setup_state_renewal(clp);
}
-int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
+int nfs41_init_clientid(struct nfs_client *clp, const struct cred *cred)
{
int status;
@@ -354,7 +347,7 @@ out:
*/
int nfs41_discover_server_trunking(struct nfs_client *clp,
struct nfs_client **result,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
int status;
@@ -392,32 +385,32 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
* nfs4_get_clid_cred - Acquire credential for a setclientid operation
* @clp: client state handle
*
- * Returns an rpc_cred with reference count bumped, or NULL.
+ * Returns a cred with reference count bumped, or NULL.
*/
-struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp)
+const struct cred *nfs4_get_clid_cred(struct nfs_client *clp)
{
- struct rpc_cred *cred;
+ const struct cred *cred;
- spin_lock(&clp->cl_lock);
- cred = nfs4_get_machine_cred_locked(clp);
- spin_unlock(&clp->cl_lock);
+ cred = nfs4_get_machine_cred(clp);
return cred;
}
static struct nfs4_state_owner *
-nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
+nfs4_find_state_owner_locked(struct nfs_server *server, const struct cred *cred)
{
struct rb_node **p = &server->state_owners.rb_node,
*parent = NULL;
struct nfs4_state_owner *sp;
+ int cmp;
while (*p != NULL) {
parent = *p;
sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
+ cmp = cred_fscmp(cred, sp->so_cred);
- if (cred < sp->so_cred)
+ if (cmp < 0)
p = &parent->rb_left;
- else if (cred > sp->so_cred)
+ else if (cmp > 0)
p = &parent->rb_right;
else {
if (!list_empty(&sp->so_lru))
@@ -436,14 +429,16 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
struct rb_node **p = &server->state_owners.rb_node,
*parent = NULL;
struct nfs4_state_owner *sp;
+ int cmp;
while (*p != NULL) {
parent = *p;
sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
+ cmp = cred_fscmp(new->so_cred, sp->so_cred);
- if (new->so_cred < sp->so_cred)
+ if (cmp < 0)
p = &parent->rb_left;
- else if (new->so_cred > sp->so_cred)
+ else if (cmp > 0)
p = &parent->rb_right;
else {
if (!list_empty(&sp->so_lru))
@@ -490,7 +485,7 @@ nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc)
*/
static struct nfs4_state_owner *
nfs4_alloc_state_owner(struct nfs_server *server,
- struct rpc_cred *cred,
+ const struct cred *cred,
gfp_t gfp_flags)
{
struct nfs4_state_owner *sp;
@@ -505,7 +500,7 @@ nfs4_alloc_state_owner(struct nfs_server *server,
return NULL;
}
sp->so_server = server;
- sp->so_cred = get_rpccred(cred);
+ sp->so_cred = get_cred(cred);
spin_lock_init(&sp->so_lock);
INIT_LIST_HEAD(&sp->so_states);
nfs4_init_seqid_counter(&sp->so_seqid);
@@ -534,7 +529,7 @@ nfs4_reset_state_owner(struct nfs4_state_owner *sp)
static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
{
nfs4_destroy_seqid_counter(&sp->so_seqid);
- put_rpccred(sp->so_cred);
+ put_cred(sp->so_cred);
ida_simple_remove(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
kfree(sp);
}
@@ -572,7 +567,7 @@ static void nfs4_gc_state_owners(struct nfs_server *server)
* Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
*/
struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
- struct rpc_cred *cred,
+ const struct cred *cred,
gfp_t gfp_flags)
{
struct nfs_client *clp = server->nfs_client;
@@ -1041,7 +1036,7 @@ bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
*/
int nfs4_select_rw_stateid(struct nfs4_state *state,
fmode_t fmode, const struct nfs_lock_context *l_ctx,
- nfs4_stateid *dst, struct rpc_cred **cred)
+ nfs4_stateid *dst, const struct cred **cred)
{
int ret;
@@ -1560,7 +1555,7 @@ static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state
spin_lock(&sp->so_server->nfs_client->cl_lock);
list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
- if (nfs4_stateid_match_other(&state->stateid, &copy->parent_state->stateid))
+ if (!nfs4_stateid_match_other(&state->stateid, &copy->parent_state->stateid))
continue;
copy->flags = 1;
complete(&copy->completion);
@@ -1741,7 +1736,7 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
static int nfs4_reclaim_complete(struct nfs_client *clp,
const struct nfs4_state_recovery_ops *ops,
- struct rpc_cred *cred)
+ const struct cred *cred)
{
/* Notify the server we're done reclaiming our state */
if (ops->reclaim_complete)
@@ -1792,7 +1787,7 @@ static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
{
const struct nfs4_state_recovery_ops *ops;
- struct rpc_cred *cred;
+ const struct cred *cred;
int err;
if (!nfs4_state_clear_reclaim_reboot(clp))
@@ -1800,7 +1795,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
ops = clp->cl_mvops->reboot_recovery_ops;
cred = nfs4_get_clid_cred(clp);
err = nfs4_reclaim_complete(clp, ops, cred);
- put_rpccred(cred);
+ put_cred(cred);
if (err == -NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
}
@@ -1896,7 +1891,7 @@ restart:
static int nfs4_check_lease(struct nfs_client *clp)
{
- struct rpc_cred *cred;
+ const struct cred *cred;
const struct nfs4_state_maintenance_ops *ops =
clp->cl_mvops->state_renewal_ops;
int status;
@@ -1904,9 +1899,7 @@ static int nfs4_check_lease(struct nfs_client *clp)
/* Is the client already known to have an expired lease? */
if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
return 0;
- spin_lock(&clp->cl_lock);
- cred = ops->get_state_renewal_cred_locked(clp);
- spin_unlock(&clp->cl_lock);
+ cred = ops->get_state_renewal_cred(clp);
if (cred == NULL) {
cred = nfs4_get_clid_cred(clp);
status = -ENOKEY;
@@ -1914,7 +1907,7 @@ static int nfs4_check_lease(struct nfs_client *clp)
goto out;
}
status = ops->renew_lease(clp, cred);
- put_rpccred(cred);
+ put_cred(cred);
if (status == -ETIMEDOUT) {
set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
return 0;
@@ -1974,7 +1967,7 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
static int nfs4_establish_lease(struct nfs_client *clp)
{
- struct rpc_cred *cred;
+ const struct cred *cred;
const struct nfs4_state_recovery_ops *ops =
clp->cl_mvops->reboot_recovery_ops;
int status;
@@ -1986,7 +1979,7 @@ static int nfs4_establish_lease(struct nfs_client *clp)
if (cred == NULL)
return -ENOENT;
status = ops->establish_clid(clp, cred);
- put_rpccred(cred);
+ put_cred(cred);
if (status != 0)
return status;
pnfs_destroy_all_layouts(clp);
@@ -2033,7 +2026,7 @@ static int nfs4_purge_lease(struct nfs_client *clp)
*
* Returns zero or a negative NFS4ERR status code.
*/
-static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred)
+static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_fs_locations *locations = NULL;
@@ -2103,14 +2096,12 @@ static int nfs4_handle_migration(struct nfs_client *clp)
const struct nfs4_state_maintenance_ops *ops =
clp->cl_mvops->state_renewal_ops;
struct nfs_server *server;
- struct rpc_cred *cred;
+ const struct cred *cred;
dprintk("%s: migration reported on \"%s\"\n", __func__,
clp->cl_hostname);
- spin_lock(&clp->cl_lock);
- cred = ops->get_state_renewal_cred_locked(clp);
- spin_unlock(&clp->cl_lock);
+ cred = ops->get_state_renewal_cred(clp);
if (cred == NULL)
return -NFS4ERR_NOENT;
@@ -2131,13 +2122,13 @@ restart:
rcu_read_unlock();
status = nfs4_try_migration(server, cred);
if (status < 0) {
- put_rpccred(cred);
+ put_cred(cred);
return status;
}
goto restart;
}
rcu_read_unlock();
- put_rpccred(cred);
+ put_cred(cred);
return 0;
}
@@ -2151,14 +2142,12 @@ static int nfs4_handle_lease_moved(struct nfs_client *clp)
const struct nfs4_state_maintenance_ops *ops =
clp->cl_mvops->state_renewal_ops;
struct nfs_server *server;
- struct rpc_cred *cred;
+ const struct cred *cred;
dprintk("%s: lease moved reported on \"%s\"\n", __func__,
clp->cl_hostname);
- spin_lock(&clp->cl_lock);
- cred = ops->get_state_renewal_cred_locked(clp);
- spin_unlock(&clp->cl_lock);
+ cred = ops->get_state_renewal_cred(clp);
if (cred == NULL)
return -NFS4ERR_NOENT;
@@ -2186,7 +2175,7 @@ restart:
rcu_read_unlock();
out:
- put_rpccred(cred);
+ put_cred(cred);
return 0;
}
@@ -2209,7 +2198,7 @@ int nfs4_discover_server_trunking(struct nfs_client *clp,
const struct nfs4_state_recovery_ops *ops =
clp->cl_mvops->reboot_recovery_ops;
struct rpc_clnt *clnt;
- struct rpc_cred *cred;
+ const struct cred *cred;
int i, status;
dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname);
@@ -2225,7 +2214,7 @@ again:
goto out_unlock;
status = ops->detect_trunking(clp, result, cred);
- put_rpccred(cred);
+ put_cred(cred);
switch (status) {
case 0:
case -EINTR:
@@ -2416,7 +2405,7 @@ out_recovery:
static int nfs4_reset_session(struct nfs_client *clp)
{
- struct rpc_cred *cred;
+ const struct cred *cred;
int status;
if (!nfs4_has_session(clp))
@@ -2454,14 +2443,13 @@ static int nfs4_reset_session(struct nfs_client *clp)
dprintk("%s: session reset was successful for server %s!\n",
__func__, clp->cl_hostname);
out:
- if (cred)
- put_rpccred(cred);
+ put_cred(cred);
return status;
}
static int nfs4_bind_conn_to_session(struct nfs_client *clp)
{
- struct rpc_cred *cred;
+ const struct cred *cred;
int ret;
if (!nfs4_has_session(clp))
@@ -2471,8 +2459,7 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp)
return ret;
cred = nfs4_get_clid_cred(clp);
ret = nfs4_proc_bind_conn_to_session(clp, cred);
- if (cred)
- put_rpccred(cred);
+ put_cred(cred);
clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
switch (ret) {
case 0:
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index b1483b303e0b..b4557cf685fb 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -10,157 +10,302 @@
#include <linux/tracepoint.h>
+TRACE_DEFINE_ENUM(EPERM);
+TRACE_DEFINE_ENUM(ENOENT);
+TRACE_DEFINE_ENUM(EIO);
+TRACE_DEFINE_ENUM(ENXIO);
+TRACE_DEFINE_ENUM(EACCES);
+TRACE_DEFINE_ENUM(EEXIST);
+TRACE_DEFINE_ENUM(EXDEV);
+TRACE_DEFINE_ENUM(ENOTDIR);
+TRACE_DEFINE_ENUM(EISDIR);
+TRACE_DEFINE_ENUM(EFBIG);
+TRACE_DEFINE_ENUM(ENOSPC);
+TRACE_DEFINE_ENUM(EROFS);
+TRACE_DEFINE_ENUM(EMLINK);
+TRACE_DEFINE_ENUM(ENAMETOOLONG);
+TRACE_DEFINE_ENUM(ENOTEMPTY);
+TRACE_DEFINE_ENUM(EDQUOT);
+TRACE_DEFINE_ENUM(ESTALE);
+TRACE_DEFINE_ENUM(EBADHANDLE);
+TRACE_DEFINE_ENUM(EBADCOOKIE);
+TRACE_DEFINE_ENUM(ENOTSUPP);
+TRACE_DEFINE_ENUM(ETOOSMALL);
+TRACE_DEFINE_ENUM(EREMOTEIO);
+TRACE_DEFINE_ENUM(EBADTYPE);
+TRACE_DEFINE_ENUM(EAGAIN);
+TRACE_DEFINE_ENUM(ELOOP);
+TRACE_DEFINE_ENUM(EOPNOTSUPP);
+TRACE_DEFINE_ENUM(EDEADLK);
+TRACE_DEFINE_ENUM(ENOMEM);
+TRACE_DEFINE_ENUM(EKEYEXPIRED);
+TRACE_DEFINE_ENUM(ETIMEDOUT);
+TRACE_DEFINE_ENUM(ERESTARTSYS);
+TRACE_DEFINE_ENUM(ECONNREFUSED);
+TRACE_DEFINE_ENUM(ECONNRESET);
+TRACE_DEFINE_ENUM(ENETUNREACH);
+TRACE_DEFINE_ENUM(EHOSTUNREACH);
+TRACE_DEFINE_ENUM(EHOSTDOWN);
+TRACE_DEFINE_ENUM(EPIPE);
+TRACE_DEFINE_ENUM(EPFNOSUPPORT);
+TRACE_DEFINE_ENUM(EPROTONOSUPPORT);
+
+TRACE_DEFINE_ENUM(NFS4_OK);
+TRACE_DEFINE_ENUM(NFS4ERR_ACCESS);
+TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP);
+TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED);
+TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY);
+TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR);
+TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE);
+TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE);
+TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT);
+TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL);
+TRACE_DEFINE_ENUM(NFS4ERR_BADNAME);
+TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER);
+TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION);
+TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT);
+TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE);
+TRACE_DEFINE_ENUM(NFS4ERR_BADXDR);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID);
+TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN);
+TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE);
+TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY);
+TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY);
+TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION);
+TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK);
+TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION);
+TRACE_DEFINE_ENUM(NFS4ERR_DELAY);
+TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED);
+TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED);
+TRACE_DEFINE_ENUM(NFS4ERR_DENIED);
+TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL);
+TRACE_DEFINE_ENUM(NFS4ERR_DQUOT);
+TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP);
+TRACE_DEFINE_ENUM(NFS4ERR_EXIST);
+TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED);
+TRACE_DEFINE_ENUM(NFS4ERR_FBIG);
+TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED);
+TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN);
+TRACE_DEFINE_ENUM(NFS4ERR_GRACE);
+TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP);
+TRACE_DEFINE_ENUM(NFS4ERR_INVAL);
+TRACE_DEFINE_ENUM(NFS4ERR_IO);
+TRACE_DEFINE_ENUM(NFS4ERR_ISDIR);
+TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER);
+TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE);
+TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED);
+TRACE_DEFINE_ENUM(NFS4ERR_LOCKED);
+TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD);
+TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE);
+TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH);
+TRACE_DEFINE_ENUM(NFS4ERR_MLINK);
+TRACE_DEFINE_ENUM(NFS4ERR_MOVED);
+TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG);
+TRACE_DEFINE_ENUM(NFS4ERR_NOENT);
+TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE);
+TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT);
+TRACE_DEFINE_ENUM(NFS4ERR_NOSPC);
+TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR);
+TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY);
+TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP);
+TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP);
+TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME);
+TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE);
+TRACE_DEFINE_ENUM(NFS4ERR_NXIO);
+TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID);
+TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE);
+TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL);
+TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION);
+TRACE_DEFINE_ENUM(NFS4ERR_PERM);
+TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE);
+TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT);
+TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT);
+TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD);
+TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT);
+TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG);
+TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG);
+TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE);
+TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG);
+TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE);
+TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH);
+TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP);
+TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT);
+TRACE_DEFINE_ENUM(NFS4ERR_ROFS);
+TRACE_DEFINE_ENUM(NFS4ERR_SAME);
+TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED);
+TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS);
+TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY);
+TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED);
+TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT);
+TRACE_DEFINE_ENUM(NFS4ERR_STALE);
+TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID);
+TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID);
+TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK);
+TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL);
+TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS);
+TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE);
+TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND);
+TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC);
+TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED);
+TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE);
+TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
+
#define show_nfsv4_errors(error) \
- __print_symbolic(error, \
+ __print_symbolic(-(error), \
{ NFS4_OK, "OK" }, \
/* Mapped by nfs4_stat_to_errno() */ \
- { -EPERM, "EPERM" }, \
- { -ENOENT, "ENOENT" }, \
- { -EIO, "EIO" }, \
- { -ENXIO, "ENXIO" }, \
- { -EACCES, "EACCES" }, \
- { -EEXIST, "EEXIST" }, \
- { -EXDEV, "EXDEV" }, \
- { -ENOTDIR, "ENOTDIR" }, \
- { -EISDIR, "EISDIR" }, \
- { -EFBIG, "EFBIG" }, \
- { -ENOSPC, "ENOSPC" }, \
- { -EROFS, "EROFS" }, \
- { -EMLINK, "EMLINK" }, \
- { -ENAMETOOLONG, "ENAMETOOLONG" }, \
- { -ENOTEMPTY, "ENOTEMPTY" }, \
- { -EDQUOT, "EDQUOT" }, \
- { -ESTALE, "ESTALE" }, \
- { -EBADHANDLE, "EBADHANDLE" }, \
- { -EBADCOOKIE, "EBADCOOKIE" }, \
- { -ENOTSUPP, "ENOTSUPP" }, \
- { -ETOOSMALL, "ETOOSMALL" }, \
- { -EREMOTEIO, "EREMOTEIO" }, \
- { -EBADTYPE, "EBADTYPE" }, \
- { -EAGAIN, "EAGAIN" }, \
- { -ELOOP, "ELOOP" }, \
- { -EOPNOTSUPP, "EOPNOTSUPP" }, \
- { -EDEADLK, "EDEADLK" }, \
+ { EPERM, "EPERM" }, \
+ { ENOENT, "ENOENT" }, \
+ { EIO, "EIO" }, \
+ { ENXIO, "ENXIO" }, \
+ { EACCES, "EACCES" }, \
+ { EEXIST, "EEXIST" }, \
+ { EXDEV, "EXDEV" }, \
+ { ENOTDIR, "ENOTDIR" }, \
+ { EISDIR, "EISDIR" }, \
+ { EFBIG, "EFBIG" }, \
+ { ENOSPC, "ENOSPC" }, \
+ { EROFS, "EROFS" }, \
+ { EMLINK, "EMLINK" }, \
+ { ENAMETOOLONG, "ENAMETOOLONG" }, \
+ { ENOTEMPTY, "ENOTEMPTY" }, \
+ { EDQUOT, "EDQUOT" }, \
+ { ESTALE, "ESTALE" }, \
+ { EBADHANDLE, "EBADHANDLE" }, \
+ { EBADCOOKIE, "EBADCOOKIE" }, \
+ { ENOTSUPP, "ENOTSUPP" }, \
+ { ETOOSMALL, "ETOOSMALL" }, \
+ { EREMOTEIO, "EREMOTEIO" }, \
+ { EBADTYPE, "EBADTYPE" }, \
+ { EAGAIN, "EAGAIN" }, \
+ { ELOOP, "ELOOP" }, \
+ { EOPNOTSUPP, "EOPNOTSUPP" }, \
+ { EDEADLK, "EDEADLK" }, \
/* RPC errors */ \
- { -ENOMEM, "ENOMEM" }, \
- { -EKEYEXPIRED, "EKEYEXPIRED" }, \
- { -ETIMEDOUT, "ETIMEDOUT" }, \
- { -ERESTARTSYS, "ERESTARTSYS" }, \
- { -ECONNREFUSED, "ECONNREFUSED" }, \
- { -ECONNRESET, "ECONNRESET" }, \
- { -ENETUNREACH, "ENETUNREACH" }, \
- { -EHOSTUNREACH, "EHOSTUNREACH" }, \
- { -EHOSTDOWN, "EHOSTDOWN" }, \
- { -EPIPE, "EPIPE" }, \
- { -EPFNOSUPPORT, "EPFNOSUPPORT" }, \
- { -EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \
+ { ENOMEM, "ENOMEM" }, \
+ { EKEYEXPIRED, "EKEYEXPIRED" }, \
+ { ETIMEDOUT, "ETIMEDOUT" }, \
+ { ERESTARTSYS, "ERESTARTSYS" }, \
+ { ECONNREFUSED, "ECONNREFUSED" }, \
+ { ECONNRESET, "ECONNRESET" }, \
+ { ENETUNREACH, "ENETUNREACH" }, \
+ { EHOSTUNREACH, "EHOSTUNREACH" }, \
+ { EHOSTDOWN, "EHOSTDOWN" }, \
+ { EPIPE, "EPIPE" }, \
+ { EPFNOSUPPORT, "EPFNOSUPPORT" }, \
+ { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \
/* NFSv4 native errors */ \
- { -NFS4ERR_ACCESS, "ACCESS" }, \
- { -NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \
- { -NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \
- { -NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \
- { -NFS4ERR_BADCHAR, "BADCHAR" }, \
- { -NFS4ERR_BADHANDLE, "BADHANDLE" }, \
- { -NFS4ERR_BADIOMODE, "BADIOMODE" }, \
- { -NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \
- { -NFS4ERR_BADLABEL, "BADLABEL" }, \
- { -NFS4ERR_BADNAME, "BADNAME" }, \
- { -NFS4ERR_BADOWNER, "BADOWNER" }, \
- { -NFS4ERR_BADSESSION, "BADSESSION" }, \
- { -NFS4ERR_BADSLOT, "BADSLOT" }, \
- { -NFS4ERR_BADTYPE, "BADTYPE" }, \
- { -NFS4ERR_BADXDR, "BADXDR" }, \
- { -NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \
- { -NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \
- { -NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \
- { -NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \
- { -NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \
- { -NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \
- { -NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
- { -NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \
- { -NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \
- { -NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \
- { -NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \
+ { NFS4ERR_ACCESS, "ACCESS" }, \
+ { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \
+ { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \
+ { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \
+ { NFS4ERR_BADCHAR, "BADCHAR" }, \
+ { NFS4ERR_BADHANDLE, "BADHANDLE" }, \
+ { NFS4ERR_BADIOMODE, "BADIOMODE" }, \
+ { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \
+ { NFS4ERR_BADLABEL, "BADLABEL" }, \
+ { NFS4ERR_BADNAME, "BADNAME" }, \
+ { NFS4ERR_BADOWNER, "BADOWNER" }, \
+ { NFS4ERR_BADSESSION, "BADSESSION" }, \
+ { NFS4ERR_BADSLOT, "BADSLOT" }, \
+ { NFS4ERR_BADTYPE, "BADTYPE" }, \
+ { NFS4ERR_BADXDR, "BADXDR" }, \
+ { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \
+ { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \
+ { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \
+ { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \
+ { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \
+ { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \
+ { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
+ { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \
+ { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \
+ { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \
+ { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \
"CONN_NOT_BOUND_TO_SESSION" }, \
- { -NFS4ERR_DEADLOCK, "DEADLOCK" }, \
- { -NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \
- { -NFS4ERR_DELAY, "DELAY" }, \
- { -NFS4ERR_DELEG_ALREADY_WANTED, \
+ { NFS4ERR_DEADLOCK, "DEADLOCK" }, \
+ { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \
+ { NFS4ERR_DELAY, "DELAY" }, \
+ { NFS4ERR_DELEG_ALREADY_WANTED, \
"DELEG_ALREADY_WANTED" }, \
- { -NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \
- { -NFS4ERR_DENIED, "DENIED" }, \
- { -NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \
- { -NFS4ERR_DQUOT, "DQUOT" }, \
- { -NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \
- { -NFS4ERR_EXIST, "EXIST" }, \
- { -NFS4ERR_EXPIRED, "EXPIRED" }, \
- { -NFS4ERR_FBIG, "FBIG" }, \
- { -NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \
- { -NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \
- { -NFS4ERR_GRACE, "GRACE" }, \
- { -NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \
- { -NFS4ERR_INVAL, "INVAL" }, \
- { -NFS4ERR_IO, "IO" }, \
- { -NFS4ERR_ISDIR, "ISDIR" }, \
- { -NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \
- { -NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \
- { -NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \
- { -NFS4ERR_LOCKED, "LOCKED" }, \
- { -NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \
- { -NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \
- { -NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \
- { -NFS4ERR_MLINK, "MLINK" }, \
- { -NFS4ERR_MOVED, "MOVED" }, \
- { -NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \
- { -NFS4ERR_NOENT, "NOENT" }, \
- { -NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \
- { -NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \
- { -NFS4ERR_NOSPC, "NOSPC" }, \
- { -NFS4ERR_NOTDIR, "NOTDIR" }, \
- { -NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \
- { -NFS4ERR_NOTSUPP, "NOTSUPP" }, \
- { -NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \
- { -NFS4ERR_NOT_SAME, "NOT_SAME" }, \
- { -NFS4ERR_NO_GRACE, "NO_GRACE" }, \
- { -NFS4ERR_NXIO, "NXIO" }, \
- { -NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \
- { -NFS4ERR_OPENMODE, "OPENMODE" }, \
- { -NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \
- { -NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \
- { -NFS4ERR_PERM, "PERM" }, \
- { -NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \
- { -NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \
- { -NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \
- { -NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \
- { -NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \
- { -NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \
- { -NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \
- { -NFS4ERR_REP_TOO_BIG_TO_CACHE, \
+ { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \
+ { NFS4ERR_DENIED, "DENIED" }, \
+ { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \
+ { NFS4ERR_DQUOT, "DQUOT" }, \
+ { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \
+ { NFS4ERR_EXIST, "EXIST" }, \
+ { NFS4ERR_EXPIRED, "EXPIRED" }, \
+ { NFS4ERR_FBIG, "FBIG" }, \
+ { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \
+ { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \
+ { NFS4ERR_GRACE, "GRACE" }, \
+ { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \
+ { NFS4ERR_INVAL, "INVAL" }, \
+ { NFS4ERR_IO, "IO" }, \
+ { NFS4ERR_ISDIR, "ISDIR" }, \
+ { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \
+ { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \
+ { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \
+ { NFS4ERR_LOCKED, "LOCKED" }, \
+ { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \
+ { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \
+ { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \
+ { NFS4ERR_MLINK, "MLINK" }, \
+ { NFS4ERR_MOVED, "MOVED" }, \
+ { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \
+ { NFS4ERR_NOENT, "NOENT" }, \
+ { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \
+ { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \
+ { NFS4ERR_NOSPC, "NOSPC" }, \
+ { NFS4ERR_NOTDIR, "NOTDIR" }, \
+ { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \
+ { NFS4ERR_NOTSUPP, "NOTSUPP" }, \
+ { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \
+ { NFS4ERR_NOT_SAME, "NOT_SAME" }, \
+ { NFS4ERR_NO_GRACE, "NO_GRACE" }, \
+ { NFS4ERR_NXIO, "NXIO" }, \
+ { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \
+ { NFS4ERR_OPENMODE, "OPENMODE" }, \
+ { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \
+ { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \
+ { NFS4ERR_PERM, "PERM" }, \
+ { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \
+ { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \
+ { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \
+ { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \
+ { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \
+ { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \
+ { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \
+ { NFS4ERR_REP_TOO_BIG_TO_CACHE, \
"REP_TOO_BIG_TO_CACHE" }, \
- { -NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \
- { -NFS4ERR_RESOURCE, "RESOURCE" }, \
- { -NFS4ERR_RESTOREFH, "RESTOREFH" }, \
- { -NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \
- { -NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \
- { -NFS4ERR_ROFS, "ROFS" }, \
- { -NFS4ERR_SAME, "SAME" }, \
- { -NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \
- { -NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \
- { -NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \
- { -NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \
- { -NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \
- { -NFS4ERR_STALE, "STALE" }, \
- { -NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \
- { -NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \
- { -NFS4ERR_SYMLINK, "SYMLINK" }, \
- { -NFS4ERR_TOOSMALL, "TOOSMALL" }, \
- { -NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \
- { -NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \
- { -NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \
- { -NFS4ERR_WRONGSEC, "WRONGSEC" }, \
- { -NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
- { -NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
- { -NFS4ERR_XDEV, "XDEV" })
+ { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \
+ { NFS4ERR_RESOURCE, "RESOURCE" }, \
+ { NFS4ERR_RESTOREFH, "RESTOREFH" }, \
+ { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \
+ { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \
+ { NFS4ERR_ROFS, "ROFS" }, \
+ { NFS4ERR_SAME, "SAME" }, \
+ { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \
+ { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \
+ { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \
+ { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \
+ { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \
+ { NFS4ERR_STALE, "STALE" }, \
+ { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \
+ { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \
+ { NFS4ERR_SYMLINK, "SYMLINK" }, \
+ { NFS4ERR_TOOSMALL, "TOOSMALL" }, \
+ { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \
+ { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \
+ { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \
+ { NFS4ERR_WRONGSEC, "WRONGSEC" }, \
+ { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
+ { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
+ { NFS4ERR_XDEV, "XDEV" })
#define show_open_flags(flags) \
__print_flags(flags, "|", \
@@ -558,6 +703,13 @@ TRACE_EVENT(nfs4_close,
)
);
+TRACE_DEFINE_ENUM(F_GETLK);
+TRACE_DEFINE_ENUM(F_SETLK);
+TRACE_DEFINE_ENUM(F_SETLKW);
+TRACE_DEFINE_ENUM(F_RDLCK);
+TRACE_DEFINE_ENUM(F_WRLCK);
+TRACE_DEFINE_ENUM(F_UNLCK);
+
#define show_lock_cmd(type) \
__print_symbolic((int)type, \
{ F_GETLK, "GETLK" }, \
@@ -1451,6 +1603,10 @@ DEFINE_NFS4_COMMIT_EVENT(nfs4_commit);
#ifdef CONFIG_NFS_V4_1
DEFINE_NFS4_COMMIT_EVENT(nfs4_pnfs_commit_ds);
+TRACE_DEFINE_ENUM(IOMODE_READ);
+TRACE_DEFINE_ENUM(IOMODE_RW);
+TRACE_DEFINE_ENUM(IOMODE_ANY);
+
#define show_pnfs_iomode(iomode) \
__print_symbolic(iomode, \
{ IOMODE_READ, "READ" }, \
@@ -1528,6 +1684,20 @@ DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutcommit);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn);
DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_UNKNOWN);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_NO_PNFS);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_RD_ZEROLEN);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_MDSTHRESH);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_NOMEM);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_BULK_RECALL);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_FOUND_CACHED);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_RETURN);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_BLOCKED);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_INVALID_OPEN);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_RETRY);
+TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
+
#define show_pnfs_update_layout_reason(reason) \
__print_symbolic(reason, \
{ PNFS_UPDATE_LAYOUT_UNKNOWN, "unknown" }, \
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 5c4568a0804b..e54d899c1848 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -461,7 +461,7 @@ EXPORT_SYMBOL_GPL(nfs_wait_on_request);
* @prev: previous request in desc, or NULL
* @req: this request
*
- * Returns zero if @req can be coalesced into @desc, otherwise it returns
+ * Returns zero if @req cannot be coalesced into @desc, otherwise it returns
* the size of the request.
*/
size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
@@ -587,7 +587,7 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
}
int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
- struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops,
+ const struct cred *cred, const struct nfs_rpc_ops *rpc_ops,
const struct rpc_call_ops *call_ops, int how, int flags)
{
struct rpc_task *task;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 06cb90e9bc6e..53726da5c010 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -275,7 +275,7 @@ pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
list_del_init(&lo->plh_layouts);
spin_unlock(&clp->cl_lock);
}
- put_rpccred(lo->plh_lc_cred);
+ put_cred(lo->plh_lc_cred);
return ld->free_layout_hdr(lo);
}
@@ -1038,7 +1038,7 @@ pnfs_alloc_init_layoutget_args(struct inode *ino,
lgp->args.ctx = get_nfs_open_context(ctx);
nfs4_stateid_copy(&lgp->args.stateid, stateid);
lgp->gfp_flags = gfp_flags;
- lgp->cred = get_rpccred(ctx->cred);
+ lgp->cred = get_cred(ctx->cred);
return lgp;
}
@@ -1049,7 +1049,7 @@ void pnfs_layoutget_free(struct nfs4_layoutget *lgp)
nfs4_free_pages(lgp->args.layout.pages, max_pages);
if (lgp->args.inode)
pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout);
- put_rpccred(lgp->cred);
+ put_cred(lgp->cred);
put_nfs_open_context(lgp->args.ctx);
kfree(lgp);
}
@@ -1324,7 +1324,7 @@ pnfs_commit_and_return_layout(struct inode *inode)
bool pnfs_roc(struct inode *ino,
struct nfs4_layoutreturn_args *args,
struct nfs4_layoutreturn_res *res,
- const struct rpc_cred *cred)
+ const struct cred *cred)
{
struct nfs_inode *nfsi = NFS_I(ino);
struct nfs_open_context *ctx;
@@ -1583,7 +1583,7 @@ alloc_init_layout_hdr(struct inode *ino,
INIT_LIST_HEAD(&lo->plh_return_segs);
INIT_LIST_HEAD(&lo->plh_bulk_destroy);
lo->plh_inode = ino;
- lo->plh_lc_cred = get_rpccred(ctx->cred);
+ lo->plh_lc_cred = get_cred(ctx->cred);
lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID;
return lo;
}
@@ -2928,7 +2928,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
spin_unlock(&inode->i_lock);
data->args.inode = inode;
- data->cred = get_rpccred(nfsi->layout->plh_lc_cred);
+ data->cred = get_cred(nfsi->layout->plh_lc_cred);
nfs_fattr_init(&data->fattr);
data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
data->res.fattr = &data->fattr;
@@ -2941,7 +2941,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
if (ld->prepare_layoutcommit) {
status = ld->prepare_layoutcommit(&data->args);
if (status) {
- put_rpccred(data->cred);
+ put_cred(data->cred);
spin_lock(&inode->i_lock);
set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
if (end_pos > nfsi->layout->plh_lwb)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e2e9fcd5341d..5e80a07b7bea 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -200,7 +200,7 @@ struct pnfs_layout_hdr {
u32 plh_return_seq;
enum pnfs_iomode plh_return_iomode;
loff_t plh_lwb; /* last write byte for layoutcommit */
- struct rpc_cred *plh_lc_cred; /* layoutcommit cred */
+ const struct cred *plh_lc_cred; /* layoutcommit cred */
struct inode *plh_inode;
};
@@ -230,7 +230,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
extern size_t max_response_pages(struct nfs_server *server);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev,
- struct rpc_cred *cred);
+ const struct cred *cred);
extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout);
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync);
@@ -280,7 +280,7 @@ int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
bool pnfs_roc(struct inode *ino,
struct nfs4_layoutreturn_args *args,
struct nfs4_layoutreturn_res *res,
- const struct rpc_cred *cred);
+ const struct cred *cred);
void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
struct nfs4_layoutreturn_res *res,
int ret);
@@ -343,7 +343,7 @@ struct nfs4_deviceid_node {
struct nfs4_deviceid_node *
nfs4_find_get_deviceid(struct nfs_server *server,
- const struct nfs4_deviceid *id, struct rpc_cred *cred,
+ const struct nfs4_deviceid *id, const struct cred *cred,
gfp_t gfp_mask);
void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *,
@@ -694,7 +694,7 @@ static inline bool
pnfs_roc(struct inode *ino,
struct nfs4_layoutreturn_args *args,
struct nfs4_layoutreturn_res *res,
- const struct rpc_cred *cred)
+ const struct cred *cred)
{
return false;
}
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index e8a07b3f9aaa..7fb59487ee90 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -94,7 +94,7 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
static struct nfs4_deviceid_node *
nfs4_get_device_info(struct nfs_server *server,
const struct nfs4_deviceid *dev_id,
- struct rpc_cred *cred, gfp_t gfp_flags)
+ const struct cred *cred, gfp_t gfp_flags)
{
struct nfs4_deviceid_node *d = NULL;
struct pnfs_device *pdev = NULL;
@@ -184,7 +184,7 @@ __nfs4_find_get_deviceid(struct nfs_server *server,
struct nfs4_deviceid_node *
nfs4_find_get_deviceid(struct nfs_server *server,
- const struct nfs4_deviceid *id, struct rpc_cred *cred,
+ const struct nfs4_deviceid *id, const struct cred *cred,
gfp_t gfp_mask)
{
long hash = nfs4_deviceid_hash(id);
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index d5e4d3cd8c7f..f5ad75fafc3c 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -686,7 +686,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
rpc_clnt_setup_test_and_add_xprt,
&rpcdata);
if (xprtdata.cred)
- put_rpccred(xprtdata.cred);
+ put_cred(xprtdata.cred);
} else {
clp = nfs4_set_ds_client(mds_srv,
(struct sockaddr *)&da->da_addr,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e0c257bd62b9..5552fa8b6e12 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -490,7 +490,7 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name)
* from nfs_readdir by calling the decode_entry function directly.
*/
static int
-nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+nfs_proc_readdir(struct dentry *dentry, const struct cred *cred,
u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct inode *dir = d_inode(dentry);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ac4b2f005778..22ce3c8a2f46 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -929,7 +929,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
data->minorversion = 0;
data->need_mount = true;
data->net = current->nsproxy->net_ns;
- security_init_mnt_opts(&data->lsm_opts);
+ data->lsm_opts = NULL;
}
return data;
}
@@ -1206,7 +1206,7 @@ static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option,
static int nfs_parse_mount_options(char *raw,
struct nfs_parsed_mount_data *mnt)
{
- char *p, *string, *secdata;
+ char *p, *string;
int rc, sloppy = 0, invalid_option = 0;
unsigned short protofamily = AF_UNSPEC;
unsigned short mountfamily = AF_UNSPEC;
@@ -1217,20 +1217,10 @@ static int nfs_parse_mount_options(char *raw,
}
dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
- secdata = alloc_secdata();
- if (!secdata)
- goto out_nomem;
-
- rc = security_sb_copy_data(raw, secdata);
- if (rc)
- goto out_security_failure;
-
- rc = security_sb_parse_opts_str(secdata, &mnt->lsm_opts);
+ rc = security_sb_eat_lsm_opts(raw, &mnt->lsm_opts);
if (rc)
goto out_security_failure;
- free_secdata(secdata);
-
while ((p = strsep(&raw, ",")) != NULL) {
substring_t args[MAX_OPT_ARGS];
unsigned long option;
@@ -1682,7 +1672,6 @@ out_nomem:
printk(KERN_INFO "NFS: not enough memory to parse option\n");
return 0;
out_security_failure:
- free_secdata(secdata);
printk(KERN_INFO "NFS: security options invalid: %d\n", rc);
return 0;
}
@@ -2081,14 +2070,9 @@ static int nfs23_validate_mount_data(void *options,
if (data->context[0]){
#ifdef CONFIG_SECURITY_SELINUX
int rc;
- char *opts_str = kmalloc(sizeof(data->context) + 8, GFP_KERNEL);
- if (!opts_str)
- return -ENOMEM;
- strcpy(opts_str, "context=");
data->context[NFS_MAX_CONTEXT_LEN] = '\0';
- strcat(opts_str, &data->context[0]);
- rc = security_sb_parse_opts_str(opts_str, &args->lsm_opts);
- kfree(opts_str);
+ rc = security_add_mnt_opt("context", data->context,
+ strlen(data->context), &args->lsm_opts);
if (rc)
return rc;
#else
@@ -2168,7 +2152,10 @@ static int nfs_validate_text_mount_data(void *options,
if (args->version == 4) {
#if IS_ENABLED(CONFIG_NFS_V4)
- port = NFS_PORT;
+ if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
+ port = NFS_RDMA_PORT;
+ else
+ port = NFS_PORT;
max_namelen = NFS4_MAXNAMLEN;
max_pathlen = NFS4_MAXPATHLEN;
nfs_validate_transport_protocol(args);
@@ -2178,8 +2165,11 @@ static int nfs_validate_text_mount_data(void *options,
#else
goto out_v4_not_compiled;
#endif /* CONFIG_NFS_V4 */
- } else
+ } else {
nfs_set_mount_transport_protocol(args);
+ if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
+ port = NFS_RDMA_PORT;
+ }
nfs_set_port(sap, &args->nfs_server.port, port);
@@ -2265,7 +2255,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
options->version <= 6))))
return 0;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ data = nfs_alloc_parsed_mount_data();
if (data == NULL)
return -ENOMEM;
@@ -2304,8 +2294,10 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
/* compare new mount options with old ones */
error = nfs_compare_remount_data(nfss, data);
+ if (!error)
+ error = security_sb_remount(sb, data->lsm_opts);
out:
- kfree(data);
+ nfs_free_parsed_mount_data(data);
return error;
}
EXPORT_SYMBOL_GPL(nfs_remount);
@@ -2409,8 +2401,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n
goto Ebusy;
if (a->acdirmax != b->acdirmax)
goto Ebusy;
- if (b->auth_info.flavor_len > 0 &&
- clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
+ if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
goto Ebusy;
return 1;
Ebusy:
@@ -2543,7 +2534,7 @@ int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
kflags |= SECURITY_LSM_NATIVE_LABELS;
- error = security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts,
+ error = security_sb_set_mnt_opts(s, mount_info->parsed->lsm_opts,
kflags, &kflags_out);
if (error)
goto err;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index fd61bf0fce63..79b97b3c4427 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -31,7 +31,7 @@
static void
nfs_free_unlinkdata(struct nfs_unlinkdata *data)
{
- put_rpccred(data->cred);
+ put_cred(data->cred);
kfree(data->args.name.name);
kfree(data);
}
@@ -177,11 +177,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
goto out_free;
data->args.name.len = name->len;
- data->cred = rpc_lookup_cred();
- if (IS_ERR(data->cred)) {
- status = PTR_ERR(data->cred);
- goto out_free_name;
- }
+ data->cred = get_current_cred();
data->res.dir_attr = &data->dir_attr;
init_waitqueue_head(&data->wq);
@@ -202,8 +198,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
return 0;
out_unlock:
spin_unlock(&dentry->d_lock);
- put_rpccred(data->cred);
-out_free_name:
+ put_cred(data->cred);
kfree(data->args.name.name);
out_free:
kfree(data);
@@ -307,7 +302,7 @@ static void nfs_async_rename_release(void *calldata)
iput(data->old_dir);
iput(data->new_dir);
nfs_sb_deactive(sb);
- put_rpccred(data->cred);
+ put_cred(data->cred);
kfree(data);
}
@@ -352,12 +347,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
return ERR_PTR(-ENOMEM);
task_setup_data.callback_data = data;
- data->cred = rpc_lookup_cred();
- if (IS_ERR(data->cred)) {
- struct rpc_task *task = ERR_CAST(data->cred);
- kfree(data);
- return task;
- }
+ data->cred = get_current_cred();
msg.rpc_argp = &data->args;
msg.rpc_resp = &data->res;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 586726a590d8..5a0bbf917a32 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1233,9 +1233,12 @@ int
nfs_key_timeout_notify(struct file *filp, struct inode *inode)
{
struct nfs_open_context *ctx = nfs_file_open_context(filp);
- struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
- return rpcauth_key_timeout_notify(auth, ctx->cred);
+ if (nfs_ctx_key_to_expire(ctx, inode) &&
+ !ctx->ll_cred)
+ /* Already expired! */
+ return -EACCES;
+ return 0;
}
/*
@@ -1244,8 +1247,23 @@ nfs_key_timeout_notify(struct file *filp, struct inode *inode)
bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode)
{
struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
+ struct rpc_cred *cred = ctx->ll_cred;
+ struct auth_cred acred = {
+ .cred = ctx->cred,
+ };
- return rpcauth_cred_key_to_expire(auth, ctx->cred);
+ if (cred && !cred->cr_ops->crmatch(&acred, cred, 0)) {
+ put_rpccred(cred);
+ ctx->ll_cred = NULL;
+ cred = NULL;
+ }
+ if (!cred)
+ cred = auth->au_ops->lookup_cred(auth, &acred, 0);
+ if (!cred || IS_ERR(cred))
+ return true;
+ ctx->ll_cred = cred;
+ return !!(cred->cr_ops->crkey_timeout &&
+ cred->cr_ops->crkey_timeout(cred));
}
/*
@@ -2121,7 +2139,7 @@ int __init nfs_init_writepagecache(void)
* This allows larger machines to have larger/more transfers.
* Limit the default to 256M
*/
- nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
+ nfs_congestion_kb = (16*int_sqrt(totalram_pages())) << (PAGE_SHIFT-10);
if (nfs_congestion_kb > 256*1024)
nfs_congestion_kb = 256*1024;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 25987bcdf96f..c74e4538d0eb 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -844,24 +844,23 @@ static int max_cb_time(struct net *net)
return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
}
-static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
+static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
{
if (clp->cl_minorversion == 0) {
- char *principal = clp->cl_cred.cr_targ_princ ?
- clp->cl_cred.cr_targ_princ : "nfs";
- struct rpc_cred *cred;
-
- cred = rpc_lookup_machine_cred(principal);
- if (!IS_ERR(cred))
- get_rpccred(cred);
- return cred;
+ client->cl_principal = clp->cl_cred.cr_targ_princ ?
+ clp->cl_cred.cr_targ_princ : "nfs";
+
+ return get_cred(rpc_machine_cred());
} else {
- struct rpc_auth *auth = client->cl_auth;
- struct auth_cred acred = {};
+ struct cred *kcred;
+
+ kcred = prepare_kernel_cred(NULL);
+ if (!kcred)
+ return NULL;
- acred.uid = ses->se_cb_sec.uid;
- acred.gid = ses->se_cb_sec.gid;
- return auth->au_ops->lookup_cred(client->cl_auth, &acred, 0);
+ kcred->uid = ses->se_cb_sec.uid;
+ kcred->gid = ses->se_cb_sec.gid;
+ return kcred;
}
}
@@ -884,7 +883,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
.flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
};
struct rpc_clnt *client;
- struct rpc_cred *cred;
+ const struct cred *cred;
if (clp->cl_minorversion == 0) {
if (!clp->cl_cred.cr_principal &&
@@ -1214,7 +1213,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
if (clp->cl_cb_client) {
rpc_shutdown_client(clp->cl_cb_client);
clp->cl_cb_client = NULL;
- put_rpccred(clp->cl_cb_cred);
+ put_cred(clp->cl_cb_cred);
clp->cl_cb_cred = NULL;
}
if (clp->cl_cb_conn.cb_xprt) {
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 2b36aa037ce0..44517fb5c0de 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -656,7 +656,6 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
struct nfsd_net *nn;
ktime_t now, cutoff;
const struct nfsd4_layout_ops *ops;
- LIST_HEAD(reaplist);
switch (task->tk_status) {
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d505990dac7c..0cfd257ffdaf 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -863,8 +863,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_rename *rename = &u->rename;
__be32 status;
- if (opens_in_grace(SVC_NET(rqstp)) &&
- !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
+ if (opens_in_grace(SVC_NET(rqstp)))
return nfserr_grace;
status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
rename->rn_snamelen, &cstate->current_fh,
@@ -1016,8 +1015,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
&write->wr_head, write->wr_buflen);
- if (!nvecs)
- return nfserr_io;
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
@@ -1348,7 +1345,7 @@ static __be32
nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
{
- __be32 status = nfserr_notsupp;
+ __be32 status;
struct file *file;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
@@ -2682,25 +2679,25 @@ static const struct nfsd4_operation nfsd4_ops[] = {
/* NFSv4.2 operations */
[OP_ALLOCATE] = {
.op_func = nfsd4_allocate,
- .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_ALLOCATE",
.op_rsize_bop = nfsd4_only_status_rsize,
},
[OP_DEALLOCATE] = {
.op_func = nfsd4_deallocate,
- .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_DEALLOCATE",
.op_rsize_bop = nfsd4_only_status_rsize,
},
[OP_CLONE] = {
.op_func = nfsd4_clone,
- .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_CLONE",
.op_rsize_bop = nfsd4_only_status_rsize,
},
[OP_COPY] = {
.op_func = nfsd4_copy,
- .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_COPY",
.op_rsize_bop = nfsd4_copy_rsize,
},
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 9c247fa1e959..5188f9f70c78 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -662,7 +662,7 @@ struct cld_net {
struct cld_upcall {
struct list_head cu_list;
struct cld_net *cu_net;
- struct task_struct *cu_task;
+ struct completion cu_done;
struct cld_msg cu_msg;
};
@@ -671,23 +671,18 @@ __cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
{
int ret;
struct rpc_pipe_msg msg;
+ struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg);
memset(&msg, 0, sizeof(msg));
msg.data = cmsg;
msg.len = sizeof(*cmsg);
- /*
- * Set task state before we queue the upcall. That prevents
- * wake_up_process in the downcall from racing with schedule.
- */
- set_current_state(TASK_UNINTERRUPTIBLE);
ret = rpc_queue_upcall(pipe, &msg);
if (ret < 0) {
- set_current_state(TASK_RUNNING);
goto out;
}
- schedule();
+ wait_for_completion(&cup->cu_done);
if (msg.errno < 0)
ret = msg.errno;
@@ -754,7 +749,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
return -EFAULT;
- wake_up_process(cup->cu_task);
+ complete(&cup->cu_done);
return mlen;
}
@@ -769,7 +764,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
if (msg->errno >= 0)
return;
- wake_up_process(cup->cu_task);
+ complete(&cup->cu_done);
}
static const struct rpc_pipe_ops cld_upcall_ops = {
@@ -900,7 +895,7 @@ restart_search:
goto restart_search;
}
}
- new->cu_task = current;
+ init_completion(&new->cu_done);
new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
new->cu_net = cn;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f093fbe47133..fb3c9844c82a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -238,7 +238,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
}
spin_unlock(&nn->blocked_locks_lock);
if (found)
- posix_unblock_lock(&found->nbl_lock);
+ locks_delete_block(&found->nbl_lock);
return found;
}
@@ -293,7 +293,7 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
nbl_lru);
list_del_init(&nbl->nbl_lru);
- posix_unblock_lock(&nbl->nbl_lock);
+ locks_delete_block(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
}
@@ -4863,7 +4863,7 @@ nfs4_laundromat(struct nfsd_net *nn)
nbl = list_first_entry(&reaplist,
struct nfsd4_blocked_lock, nbl_lru);
list_del_init(&nbl->nbl_lru);
- posix_unblock_lock(&nbl->nbl_lock);
+ locks_delete_block(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
out:
@@ -5112,7 +5112,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
}
static __be32
-nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags)
+nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags)
{
__be32 status;
@@ -5195,7 +5195,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
- status = nfs4_check_olstateid(fhp, openlockstateid(s), flags);
+ status = nfs4_check_olstateid(openlockstateid(s), flags);
break;
default:
status = nfserr_bad_stateid;
@@ -6230,15 +6230,15 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
case NFS4_READ_LT:
case NFS4_READW_LT:
file_lock->fl_type = F_RDLCK;
- break;
+ break;
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
file_lock->fl_type = F_WRLCK;
- break;
+ break;
default:
dprintk("NFSD: nfs4_lockt: bad lock type!\n");
status = nfserr_inval;
- goto out;
+ goto out;
}
lo = find_lockowner_str(cstate->clp, &lockt->lt_owner);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index e2fe0e9ce0df..da52b594362a 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -99,7 +99,7 @@ static unsigned int
nfsd_cache_size_limit(void)
{
unsigned int limit;
- unsigned long low_pages = totalram_pages - totalhigh_pages;
+ unsigned long low_pages = totalram_pages() - totalhigh_pages();
limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10);
return min_t(unsigned int, limit, 256*1024);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 6384c9b94898..b33f9785b756 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1126,6 +1126,8 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case 'Y':
case 'y':
case '1':
+ if (nn->nfsd_serv)
+ return -EBUSY;
nfsd4_end_grace(nn);
break;
default:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 6aacb325b6a0..396c76755b03 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -327,7 +327,7 @@ struct nfs4_client {
#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
1 << NFSD4_CLIENT_CB_KILL)
unsigned long cl_flags;
- struct rpc_cred *cl_cb_cred;
+ const struct cred *cl_cb_cred;
struct rpc_clnt *cl_cb_client;
u32 cl_cb_ident;
#define NFSD4_CB_UP 0
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index eb67098117b4..9824e32b2f23 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -396,10 +396,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE);
- if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+ if (iap->ia_valid & ATTR_SIZE) {
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
- if (iap->ia_valid & ATTR_SIZE)
ftype = S_IFREG;
+ }
+
+ /*
+ * If utimes(2) and friends are called with times not NULL, we should
+ * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
+ * will return EACCESS, when the caller's effective UID does not match
+ * the owner of the file, and the caller is not privileged. In this
+ * situation, we should return EPERM(notify_change will return this).
+ */
+ if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME)) {
+ accmode |= NFSD_MAY_OWNER_OVERRIDE;
+ if (!(iap->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET)))
+ accmode |= NFSD_MAY_WRITE;
+ }
/* Callers that do fh_verify should do the fh_want_write: */
get_write_count = !fhp->fh_dentry;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index e08a6647267b..3723f3d18d20 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -89,7 +89,13 @@ static int fanotify_get_response(struct fsnotify_group *group,
return ret;
}
-static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
+/*
+ * This function returns a mask for an event that only contains the flags
+ * that have been specifically requested by the user. Flags that may have
+ * been included within the event mask, but have not been explicitly
+ * requested by the user, will not be present in the returned mask.
+ */
+static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info,
u32 event_mask, const void *data,
int data_type)
{
@@ -101,14 +107,14 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
__func__, iter_info->report_mask, event_mask, data, data_type);
- /* if we don't have enough info to send an event to userspace say no */
+ /* If we don't have enough info to send an event to userspace say no */
if (data_type != FSNOTIFY_EVENT_PATH)
- return false;
+ return 0;
- /* sorry, fanotify only gives a damn about files and dirs */
+ /* Sorry, fanotify only gives a damn about files and dirs */
if (!d_is_reg(path->dentry) &&
!d_can_lookup(path->dentry))
- return false;
+ return 0;
fsnotify_foreach_obj_type(type) {
if (!fsnotify_iter_should_report_type(iter_info, type))
@@ -129,13 +135,10 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
if (d_is_dir(path->dentry) &&
!(marks_mask & FS_ISDIR & ~marks_ignored_mask))
- return false;
-
- if (event_mask & FANOTIFY_OUTGOING_EVENTS &
- marks_mask & ~marks_ignored_mask)
- return true;
+ return 0;
- return false;
+ return event_mask & FANOTIFY_OUTGOING_EVENTS & marks_mask &
+ ~marks_ignored_mask;
}
struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
@@ -207,10 +210,13 @@ static int fanotify_handle_event(struct fsnotify_group *group,
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
+ BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
+ BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
- BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 10);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 12);
- if (!fanotify_should_send_event(iter_info, mask, data, data_type))
+ mask = fanotify_group_event_mask(iter_info, mask, data, data_type);
+ if (!mask)
return 0;
pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index e03be5071362..9c870b0d2b56 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -206,7 +206,7 @@ static int process_access_response(struct fsnotify_group *group,
static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fsnotify_event *event,
- char __user *buf)
+ char __user *buf, size_t count)
{
struct fanotify_event_metadata fanotify_event_metadata;
struct file *f;
@@ -220,6 +220,12 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
fd = fanotify_event_metadata.fd;
ret = -EFAULT;
+ /*
+ * Sanity check copy size in case get_one_event() and
+ * fill_event_metadata() event_len sizes ever get out of sync.
+ */
+ if (WARN_ON_ONCE(fanotify_event_metadata.event_len > count))
+ goto out_close_fd;
if (copy_to_user(buf, &fanotify_event_metadata,
fanotify_event_metadata.event_len))
goto out_close_fd;
@@ -295,7 +301,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
continue;
}
- ret = copy_event_to_user(group, kevent, buf);
+ ret = copy_event_to_user(group, kevent, buf, count);
if (unlikely(ret == -EOPENSTALE)) {
/*
* We cannot report events with stale fd so drop it.
@@ -669,7 +675,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
*/
if ((flags & FAN_MARK_IGNORED_MASK) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
- (atomic_read(&inode->i_writecount) > 0))
+ inode_is_open_for_write(inode))
return 0;
return fanotify_add_mark(group, &inode->i_fsnotify_marks,
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 348a184bcdda..1e2bfd26b352 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -15,6 +15,7 @@
#include <linux/exportfs.h>
#include "inotify/inotify.h"
+#include "fdinfo.h"
#include "fsnotify.h"
#if defined(CONFIG_PROC_FS)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index d2c34900ae05..ecf09b6243d9 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -401,7 +401,7 @@ static __init int fsnotify_init(void)
{
int ret;
- BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index ab172e5f51d9..5becc8acc8f4 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -47,7 +47,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask)
return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM);
/* return (void *)__get_free_page(gfp_mask); */
}
- if (likely((size >> PAGE_SHIFT) < totalram_pages))
+ if (likely((size >> PAGE_SHIFT) < totalram_pages()))
return __vmalloc(size, gfp_mask, PAGE_KERNEL);
return NULL;
}
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 99ee093182cb..cc9b32b9db7c 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-ccflags-y := -Ifs/ocfs2
+ccflags-y := -I$(src)
obj-$(CONFIG_OCFS2_FS) += \
ocfs2.o \
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index eb1ce30412dc..832c1759a09a 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -30,6 +30,7 @@
#include <linux/quotaops.h>
#include <linux/blkdev.h>
#include <linux/uio.h>
+#include <linux/mm.h>
#include <cluster/masklog.h>
@@ -397,7 +398,7 @@ static int ocfs2_readpages(struct file *filp, struct address_space *mapping,
* Check whether a remote node truncated this file - we just
* drop out in that case as it's not worth handling here.
*/
- last = list_entry(pages->prev, struct page, lru);
+ last = lru_to_page(pages);
start = (loff_t)last->index << PAGE_SHIFT;
if (start >= i_size_read(inode))
goto out_unlock;
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 4ebbd57cbf84..f9b84f7a3e4b 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -161,7 +161,6 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
#endif
}
- clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
bh->b_end_io = end_buffer_read_sync;
submit_bh(REQ_OP_READ, 0, bh);
@@ -341,7 +340,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
continue;
}
- clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
if (validate)
set_buffer_needs_validate(bh);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 9b2ed62dd638..f3c20b279eb2 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -582,9 +582,10 @@ bail:
}
static int o2hb_read_slots(struct o2hb_region *reg,
+ unsigned int begin_slot,
unsigned int max_slots)
{
- unsigned int current_slot=0;
+ unsigned int current_slot = begin_slot;
int status;
struct o2hb_bio_wait_ctxt wc;
struct bio *bio;
@@ -1093,9 +1094,14 @@ static int o2hb_highest_node(unsigned long *nodes, int numbits)
return find_last_bit(nodes, numbits);
}
+static int o2hb_lowest_node(unsigned long *nodes, int numbits)
+{
+ return find_first_bit(nodes, numbits);
+}
+
static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
{
- int i, ret, highest_node;
+ int i, ret, highest_node, lowest_node;
int membership_change = 0, own_slot_ok = 0;
unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
@@ -1120,7 +1126,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
}
highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
- if (highest_node >= O2NM_MAX_NODES) {
+ lowest_node = o2hb_lowest_node(configured_nodes, O2NM_MAX_NODES);
+ if (highest_node >= O2NM_MAX_NODES || lowest_node >= O2NM_MAX_NODES) {
mlog(ML_NOTICE, "o2hb: No configured nodes found!\n");
ret = -EINVAL;
goto bail;
@@ -1130,7 +1137,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
* yet. Of course, if the node definitions have holes in them
* then we're reading an empty slot anyway... Consider this
* best-effort. */
- ret = o2hb_read_slots(reg, highest_node + 1);
+ ret = o2hb_read_slots(reg, lowest_node, highest_node + 1);
if (ret < 0) {
mlog_errno(ret);
goto bail;
@@ -1801,7 +1808,7 @@ static int o2hb_populate_slot_data(struct o2hb_region *reg)
struct o2hb_disk_slot *slot;
struct o2hb_disk_heartbeat_block *hb_block;
- ret = o2hb_read_slots(reg, reg->hr_blocks);
+ ret = o2hb_read_slots(reg, 0, reg->hr_blocks);
if (ret)
goto out;
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile
index bd1aab1f49a4..ef2854422a6e 100644
--- a/fs/ocfs2/dlm/Makefile
+++ b/fs/ocfs2/dlm/Makefile
@@ -1,4 +1,4 @@
-ccflags-y := -Ifs/ocfs2
+ccflags-y := -I$(src)/..
obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o
diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile
index eed3db8c5b49..33431a0296a3 100644
--- a/fs/ocfs2/dlmfs/Makefile
+++ b/fs/ocfs2/dlmfs/Makefile
@@ -1,4 +1,4 @@
-ccflags-y := -Ifs/ocfs2
+ccflags-y := -I$(src)/..
obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 602c71f32740..8decbe95dcec 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -179,7 +179,7 @@ bail:
static int dlmfs_file_release(struct inode *inode,
struct file *file)
{
- int level, status;
+ int level;
struct dlmfs_inode_private *ip = DLMFS_I(inode);
struct dlmfs_filp_private *fp = file->private_data;
@@ -188,7 +188,6 @@ static int dlmfs_file_release(struct inode *inode,
mlog(0, "close called on inode %lu\n", inode->i_ino);
- status = 0;
if (fp) {
level = fp->fp_lock_level;
if (level != DLM_LOCK_IV)
@@ -255,7 +254,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
if (!count)
return 0;
- if (!access_ok(VERIFY_WRITE, buf, count))
+ if (!access_ok(buf, count))
return -EFAULT;
/* don't read past the lvb */
@@ -303,7 +302,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
if (!count)
return 0;
- if (!access_ok(VERIFY_READ, buf, count))
+ if (!access_ok(buf, count))
return -EFAULT;
/* don't write past the lvb */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index b63c97f4318e..46fd3ef2cf21 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1017,7 +1017,8 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
mlog_errno(status);
}
- if (status == 0) {
+ /* Shutdown the kernel journal system */
+ if (!jbd2_journal_destroy(journal->j_journal) && !status) {
/*
* Do not toggle if flush was unsuccessful otherwise
* will leave dirty metadata in a "clean" journal
@@ -1026,9 +1027,6 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
if (status < 0)
mlog_errno(status);
}
-
- /* Shutdown the kernel journal system */
- jbd2_journal_destroy(journal->j_journal);
journal->j_journal = NULL;
OCFS2_I(inode)->ip_open_count--;
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 7642b6712c39..58973e4d2471 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -345,13 +345,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
if (num_used
|| alloc->id1.bitmap1.i_used
|| alloc->id1.bitmap1.i_total
- || la->la_bm_off)
- mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
+ || la->la_bm_off) {
+ mlog(ML_ERROR, "inconsistent detected, clean journal with"
+ " unrecovered local alloc, please run fsck.ocfs2!\n"
"found = %u, set = %u, taken = %u, off = %u\n",
num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
le32_to_cpu(alloc->id1.bitmap1.i_total),
OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
+ status = -EINVAL;
+ goto bail;
+ }
+
osb->local_alloc_bh = alloc_bh;
osb->local_alloc_state = OCFS2_LA_ENABLED;
@@ -835,7 +840,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
u32 *numbits,
struct ocfs2_alloc_reservation *resv)
{
- int numfound = 0, bitoff, left, startoff, lastzero;
+ int numfound = 0, bitoff, left, startoff;
int local_resv = 0;
struct ocfs2_alloc_reservation r;
void *bitmap = NULL;
@@ -873,7 +878,6 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
numfound = bitoff = startoff = 0;
- lastzero = -1;
left = le32_to_cpu(alloc->id1.bitmap1.i_total);
while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
if (bitoff == left) {
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index d56f0079b858..b11acd34001a 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -52,6 +52,7 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode,
if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
lockres->l_level > LKM_NLMODE) {
int old_level = 0;
+ struct file_lock request;
if (lockres->l_level == LKM_EXMODE)
old_level = 1;
@@ -66,11 +67,10 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode,
* level.
*/
- locks_lock_file_wait(file,
- &(struct file_lock) {
- .fl_type = F_UNLCK,
- .fl_flags = FL_FLOCK
- });
+ locks_init_lock(&request);
+ request.fl_type = F_UNLCK;
+ request.fl_flags = FL_FLOCK;
+ locks_lock_file_wait(file, &request);
ocfs2_file_unlock(file);
}
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 607092f367ad..1b2d0d2fe2ee 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -199,10 +199,11 @@ static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry
child = dp->child;
while (child) {
- int n = strlen(child->path_component_name);
+ const char *node_name = kbasename(child->full_name);
+ int n = strlen(node_name);
if (len == n &&
- !strncmp(child->path_component_name, name, len)) {
+ !strncmp(node_name, name, len)) {
ent_type = op_inode_node;
ent_data.node = child;
ino = child->unique_id;
@@ -245,7 +246,7 @@ found:
set_nlink(inode, 2);
break;
case op_inode_prop:
- if (!strcmp(dp->name, "options") && (len == 17) &&
+ if (of_node_name_eq(dp, "options") && (len == 17) &&
!strncmp (name, "security-password", 17))
inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
else
@@ -293,8 +294,8 @@ static int openpromfs_readdir(struct file *file, struct dir_context *ctx)
}
while (child) {
if (!dir_emit(ctx,
- child->path_component_name,
- strlen(child->path_component_name),
+ kbasename(child->full_name),
+ strlen(kbasename(child->full_name)),
child->unique_id, DT_DIR))
goto out;
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index fe53381b26b1..f038235c64bd 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -77,7 +77,7 @@ static int orangefs_readpages(struct file *file,
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page;
- page = list_entry(pages->prev, struct page, lru);
+ page = lru_to_page(pages);
list_del(&page->lru);
if (!add_to_page_cache(page,
mapping,
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index c4e98c9c1621..443bcd8c3c19 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -105,7 +105,7 @@ static int wait_for_free(struct slot_map *m)
left = t;
else
left = t + (left - n);
- if (unlikely(signal_pending(current)))
+ if (signal_pending(current))
left = -EINTR;
} while (left > 0);
diff --git a/fs/pnode.c b/fs/pnode.c
index 53d411a371ce..1100e810d855 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -10,6 +10,7 @@
#include <linux/mount.h>
#include <linux/fs.h>
#include <linux/nsproxy.h>
+#include <uapi/linux/mount.h>
#include "internal.h"
#include "pnode.h"
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0ceb3b6b37e7..9d428d5a0ac8 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -392,6 +392,15 @@ static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
seq_putc(m, '\n');
}
+static inline void task_thp_status(struct seq_file *m, struct mm_struct *mm)
+{
+ bool thp_enabled = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE);
+
+ if (thp_enabled)
+ thp_enabled = !test_bit(MMF_DISABLE_THP, &mm->flags);
+ seq_printf(m, "THP_enabled:\t%d\n", thp_enabled);
+}
+
int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
@@ -406,6 +415,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
if (mm) {
task_mem(m, mm);
task_core_dumping(m, mm);
+ task_thp_status(m, mm);
mmput(mm);
}
task_sig(m, task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ce3465479447..633a63462573 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -530,7 +530,7 @@ static const struct file_operations proc_lstats_operations = {
static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- unsigned long totalpages = totalram_pages + total_swap_pages;
+ unsigned long totalpages = totalram_pages() + total_swap_pages;
unsigned long points = 0;
points = oom_badness(task, NULL, NULL, totalpages) *
@@ -581,8 +581,10 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
/*
* print the file header
*/
- seq_printf(m, "%-25s %-20s %-20s %-10s\n",
- "Limit", "Soft Limit", "Hard Limit", "Units");
+ seq_puts(m, "Limit "
+ "Soft Limit "
+ "Hard Limit "
+ "Units \n");
for (i = 0; i < RLIM_NLIMITS; i++) {
if (rlim[i].rlim_cur == RLIM_INFINITY)
@@ -2356,10 +2358,13 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
return -ESRCH;
if (p != current) {
- if (!capable(CAP_SYS_NICE)) {
+ rcu_read_lock();
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+ rcu_read_unlock();
count = -EPERM;
goto out;
}
+ rcu_read_unlock();
err = security_task_setscheduler(p);
if (err) {
@@ -2392,11 +2397,14 @@ static int timerslack_ns_show(struct seq_file *m, void *v)
return -ESRCH;
if (p != current) {
-
- if (!capable(CAP_SYS_NICE)) {
+ rcu_read_lock();
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+ rcu_read_unlock();
err = -EPERM;
goto out;
}
+ rcu_read_unlock();
+
err = security_task_getscheduler(p);
if (err)
goto out;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 5792f9e39466..da649ccd6804 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -59,7 +59,6 @@ static struct kmem_cache *pde_opener_cache __ro_after_init;
static struct inode *proc_alloc_inode(struct super_block *sb)
{
struct proc_inode *ei;
- struct inode *inode;
ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
if (!ei)
@@ -71,8 +70,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
ei->sysctl = NULL;
ei->sysctl_entry = NULL;
ei->ns_ops = NULL;
- inode = &ei->vfs_inode;
- return inode;
+ return &ei->vfs_inode;
}
static void proc_i_callback(struct rcu_head *head)
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 6c517b11acf8..40b05e0d4274 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -46,7 +46,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
ppage = pfn_to_page(pfn);
else
ppage = NULL;
- if (!ppage || PageSlab(ppage))
+ if (!ppage || PageSlab(ppage) || page_has_type(ppage))
pcount = 0;
else
pcount = page_mapcount(ppage);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 89921a0d2ebb..4d598a399bbf 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -464,7 +464,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
inode = new_inode(sb);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOMEM);
inode->i_ino = get_next_ino();
@@ -474,8 +474,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
if (unlikely(head->unregistering)) {
spin_unlock(&sysctl_lock);
iput(inode);
- inode = NULL;
- goto out;
+ return ERR_PTR(-ENOENT);
}
ei->sysctl = head;
ei->sysctl_entry = table;
@@ -500,7 +499,6 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
if (root->set_ownership)
root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
-out:
return inode;
}
@@ -549,10 +547,11 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
- err = ERR_PTR(-ENOMEM);
inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
- if (!inode)
+ if (IS_ERR(inode)) {
+ err = ERR_CAST(inode);
goto out;
+ }
d_set_d_op(dentry, &proc_sys_dentry_operations);
err = d_splice_alias(inode, dentry);
@@ -685,7 +684,7 @@ static bool proc_sys_fill_cache(struct file *file,
if (d_in_lookup(child)) {
struct dentry *res;
inode = proc_sys_make_inode(dir->d_sb, head, table);
- if (!inode) {
+ if (IS_ERR(inode)) {
d_lookup_done(child);
dput(child);
return false;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 47c3764c469b..f0ec9edab2f3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -790,6 +790,8 @@ static int show_smap(struct seq_file *m, void *v)
__show_smap(m, &mss);
+ seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma));
+
if (arch_pkeys_enabled())
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
show_smap_vma_flags(m, vma);
@@ -1096,6 +1098,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
return -ESRCH;
mm = get_task_mm(task);
if (mm) {
+ struct mmu_notifier_range range;
struct clear_refs_private cp = {
.type = type,
};
@@ -1139,11 +1142,13 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
downgrade_write(&mm->mmap_sem);
break;
}
- mmu_notifier_invalidate_range_start(mm, 0, -1);
+
+ mmu_notifier_range_init(&range, mm, 0, -1UL);
+ mmu_notifier_invalidate_range_start(&range);
}
walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
if (type == CLEAR_REFS_SOFT_DIRTY)
- mmu_notifier_invalidate_range_end(mm, 0, -1);
+ mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, 0, -1);
up_read(&mm->mmap_sem);
out_mm:
diff --git a/fs/proc/util.c b/fs/proc/util.c
index b161cfa0f9fa..98f8adc17345 100644
--- a/fs/proc/util.c
+++ b/fs/proc/util.c
@@ -1,4 +1,5 @@
#include <linux/dcache.h>
+#include "internal.h"
unsigned name_to_int(const struct qstr *qstr)
{
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index 06aab07b6bb7..b8a0931568f8 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -148,7 +148,7 @@ void pstore_unregister_ftrace(void)
mutex_lock(&pstore_ftrace_lock);
if (pstore_ftrace_enabled) {
unregister_ftrace_function(&pstore_ftrace_ops);
- pstore_ftrace_enabled = 0;
+ pstore_ftrace_enabled = false;
}
mutex_unlock(&pstore_ftrace_lock);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 8cf2218b46a7..c60ee46f3e39 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -335,53 +335,10 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
goto fail_alloc;
private->record = record;
- switch (record->type) {
- case PSTORE_TYPE_DMESG:
- scnprintf(name, sizeof(name), "dmesg-%s-%llu%s",
- record->psi->name, record->id,
- record->compressed ? ".enc.z" : "");
- break;
- case PSTORE_TYPE_CONSOLE:
- scnprintf(name, sizeof(name), "console-%s-%llu",
- record->psi->name, record->id);
- break;
- case PSTORE_TYPE_FTRACE:
- scnprintf(name, sizeof(name), "ftrace-%s-%llu",
- record->psi->name, record->id);
- break;
- case PSTORE_TYPE_MCE:
- scnprintf(name, sizeof(name), "mce-%s-%llu",
- record->psi->name, record->id);
- break;
- case PSTORE_TYPE_PPC_RTAS:
- scnprintf(name, sizeof(name), "rtas-%s-%llu",
- record->psi->name, record->id);
- break;
- case PSTORE_TYPE_PPC_OF:
- scnprintf(name, sizeof(name), "powerpc-ofw-%s-%llu",
- record->psi->name, record->id);
- break;
- case PSTORE_TYPE_PPC_COMMON:
- scnprintf(name, sizeof(name), "powerpc-common-%s-%llu",
- record->psi->name, record->id);
- break;
- case PSTORE_TYPE_PMSG:
- scnprintf(name, sizeof(name), "pmsg-%s-%llu",
- record->psi->name, record->id);
- break;
- case PSTORE_TYPE_PPC_OPAL:
- scnprintf(name, sizeof(name), "powerpc-opal-%s-%llu",
- record->psi->name, record->id);
- break;
- case PSTORE_TYPE_UNKNOWN:
- scnprintf(name, sizeof(name), "unknown-%s-%llu",
- record->psi->name, record->id);
- break;
- default:
- scnprintf(name, sizeof(name), "type%d-%s-%llu",
- record->type, record->psi->name, record->id);
- break;
- }
+ scnprintf(name, sizeof(name), "%s-%s-%llu%s",
+ pstore_type_to_name(record->type),
+ record->psi->name, record->id,
+ record->compressed ? ".enc.z" : "");
dentry = d_alloc_name(root, name);
if (!dentry)
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index b821054ca3ed..2d1066ed3c28 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -59,6 +59,19 @@ MODULE_PARM_DESC(update_ms, "milliseconds before pstore updates its content "
"enabling this option is not safe, it may lead to further "
"corruption on Oopses)");
+/* Names should be in the same order as the enum pstore_type_id */
+static const char * const pstore_type_names[] = {
+ "dmesg",
+ "mce",
+ "console",
+ "ftrace",
+ "rtas",
+ "powerpc-ofw",
+ "powerpc-common",
+ "pmsg",
+ "powerpc-opal",
+};
+
static int pstore_new_entry;
static void pstore_timefunc(struct timer_list *);
@@ -104,6 +117,30 @@ void pstore_set_kmsg_bytes(int bytes)
/* Tag each group of saved records with a sequence number */
static int oopscount;
+const char *pstore_type_to_name(enum pstore_type_id type)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(pstore_type_names) != PSTORE_TYPE_MAX);
+
+ if (WARN_ON_ONCE(type >= PSTORE_TYPE_MAX))
+ return "unknown";
+
+ return pstore_type_names[type];
+}
+EXPORT_SYMBOL_GPL(pstore_type_to_name);
+
+enum pstore_type_id pstore_name_to_type(const char *name)
+{
+ int i;
+
+ for (i = 0; i < PSTORE_TYPE_MAX; i++) {
+ if (!strcmp(pstore_type_names[i], name))
+ return i;
+ }
+
+ return PSTORE_TYPE_MAX;
+}
+EXPORT_SYMBOL_GPL(pstore_name_to_type);
+
static const char *get_reason_str(enum kmsg_dump_reason reason)
{
switch (reason) {
@@ -124,26 +161,27 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
}
}
-bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
+/*
+ * Should pstore_dump() wait for a concurrent pstore_dump()? If
+ * not, the current pstore_dump() will report a failure to dump
+ * and return.
+ */
+static bool pstore_cannot_wait(enum kmsg_dump_reason reason)
{
- /*
- * In case of NMI path, pstore shouldn't be blocked
- * regardless of reason.
- */
+ /* In NMI path, pstore shouldn't block regardless of reason. */
if (in_nmi())
return true;
switch (reason) {
/* In panic case, other cpus are stopped by smp_send_stop(). */
case KMSG_DUMP_PANIC:
- /* Emergency restart shouldn't be blocked by spin lock. */
+ /* Emergency restart shouldn't be blocked. */
case KMSG_DUMP_EMERG:
return true;
default:
return false;
}
}
-EXPORT_SYMBOL_GPL(pstore_cannot_block_path);
#if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS)
static int zbufsize_deflate(size_t size)
@@ -258,20 +296,6 @@ static int pstore_compress(const void *in, void *out,
return outlen;
}
-static int pstore_decompress(void *in, void *out,
- unsigned int inlen, unsigned int outlen)
-{
- int ret;
-
- ret = crypto_comp_decompress(tfm, in, inlen, out, &outlen);
- if (ret) {
- pr_err("crypto_comp_decompress failed, ret = %d!\n", ret);
- return ret;
- }
-
- return outlen;
-}
-
static void allocate_buf_for_compression(void)
{
struct crypto_comp *ctx;
@@ -318,7 +342,7 @@ static void allocate_buf_for_compression(void)
big_oops_buf_sz = size;
big_oops_buf = buf;
- pr_info("Using compression: %s\n", zbackend->name);
+ pr_info("Using crash dump compression: %s\n", zbackend->name);
}
static void free_buf_for_compression(void)
@@ -368,9 +392,8 @@ void pstore_record_init(struct pstore_record *record,
}
/*
- * callback from kmsg_dump. (s2,l2) has the most recently
- * written bytes, older bytes are in (s1,l1). Save as much
- * as we can from the end of the buffer.
+ * callback from kmsg_dump. Save as much as we can (up to kmsg_bytes) from the
+ * end of the buffer.
*/
static void pstore_dump(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason)
@@ -378,23 +401,23 @@ static void pstore_dump(struct kmsg_dumper *dumper,
unsigned long total = 0;
const char *why;
unsigned int part = 1;
- unsigned long flags = 0;
- int is_locked;
int ret;
why = get_reason_str(reason);
- if (pstore_cannot_block_path(reason)) {
- is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags);
- if (!is_locked) {
- pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
- , in_nmi() ? "NMI" : why);
+ if (down_trylock(&psinfo->buf_lock)) {
+ /* Failed to acquire lock: give up if we cannot wait. */
+ if (pstore_cannot_wait(reason)) {
+ pr_err("dump skipped in %s path: may corrupt error record\n",
+ in_nmi() ? "NMI" : why);
+ return;
+ }
+ if (down_interruptible(&psinfo->buf_lock)) {
+ pr_err("could not grab semaphore?!\n");
return;
}
- } else {
- spin_lock_irqsave(&psinfo->buf_lock, flags);
- is_locked = 1;
}
+
oopscount++;
while (total < kmsg_bytes) {
char *dst;
@@ -411,7 +434,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
record.part = part;
record.buf = psinfo->buf;
- if (big_oops_buf && is_locked) {
+ if (big_oops_buf) {
dst = big_oops_buf;
dst_size = big_oops_buf_sz;
} else {
@@ -429,7 +452,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
dst_size, &dump_size))
break;
- if (big_oops_buf && is_locked) {
+ if (big_oops_buf) {
zipped_len = pstore_compress(dst, psinfo->buf,
header_size + dump_size,
psinfo->bufsize);
@@ -452,8 +475,8 @@ static void pstore_dump(struct kmsg_dumper *dumper,
total += record.size;
part++;
}
- if (is_locked)
- spin_unlock_irqrestore(&psinfo->buf_lock, flags);
+
+ up(&psinfo->buf_lock);
}
static struct kmsg_dumper pstore_dumper = {
@@ -476,31 +499,14 @@ static void pstore_unregister_kmsg(void)
#ifdef CONFIG_PSTORE_CONSOLE
static void pstore_console_write(struct console *con, const char *s, unsigned c)
{
- const char *e = s + c;
-
- while (s < e) {
- struct pstore_record record;
- unsigned long flags;
+ struct pstore_record record;
- pstore_record_init(&record, psinfo);
- record.type = PSTORE_TYPE_CONSOLE;
-
- if (c > psinfo->bufsize)
- c = psinfo->bufsize;
+ pstore_record_init(&record, psinfo);
+ record.type = PSTORE_TYPE_CONSOLE;
- if (oops_in_progress) {
- if (!spin_trylock_irqsave(&psinfo->buf_lock, flags))
- break;
- } else {
- spin_lock_irqsave(&psinfo->buf_lock, flags);
- }
- record.buf = (char *)s;
- record.size = c;
- psinfo->write(&record);
- spin_unlock_irqrestore(&psinfo->buf_lock, flags);
- s += c;
- c = e - s;
- }
+ record.buf = (char *)s;
+ record.size = c;
+ psinfo->write(&record);
}
static struct console pstore_console = {
@@ -589,6 +595,7 @@ int pstore_register(struct pstore_info *psi)
psi->write_user = pstore_write_user_compat;
psinfo = psi;
mutex_init(&psinfo->read_mutex);
+ sema_init(&psinfo->buf_lock, 1);
spin_unlock(&pstore_lock);
if (owner && !try_module_get(owner)) {
@@ -656,8 +663,9 @@ EXPORT_SYMBOL_GPL(pstore_unregister);
static void decompress_record(struct pstore_record *record)
{
+ int ret;
int unzipped_len;
- char *decompressed;
+ char *unzipped, *workspace;
if (!record->compressed)
return;
@@ -668,35 +676,42 @@ static void decompress_record(struct pstore_record *record)
return;
}
- /* No compression method has created the common buffer. */
+ /* Missing compression buffer means compression was not initialized. */
if (!big_oops_buf) {
- pr_warn("no decompression buffer allocated\n");
+ pr_warn("no decompression method initialized!\n");
return;
}
- unzipped_len = pstore_decompress(record->buf, big_oops_buf,
- record->size, big_oops_buf_sz);
- if (unzipped_len <= 0) {
- pr_err("decompression failed: %d\n", unzipped_len);
+ /* Allocate enough space to hold max decompression and ECC. */
+ unzipped_len = big_oops_buf_sz;
+ workspace = kmalloc(unzipped_len + record->ecc_notice_size,
+ GFP_KERNEL);
+ if (!workspace)
return;
- }
- /* Build new buffer for decompressed contents. */
- decompressed = kmalloc(unzipped_len + record->ecc_notice_size,
- GFP_KERNEL);
- if (!decompressed) {
- pr_err("decompression ran out of memory\n");
+ /* After decompression "unzipped_len" is almost certainly smaller. */
+ ret = crypto_comp_decompress(tfm, record->buf, record->size,
+ workspace, &unzipped_len);
+ if (ret) {
+ pr_err("crypto_comp_decompress failed, ret = %d!\n", ret);
+ kfree(workspace);
return;
}
- memcpy(decompressed, big_oops_buf, unzipped_len);
/* Append ECC notice to decompressed buffer. */
- memcpy(decompressed + unzipped_len, record->buf + record->size,
+ memcpy(workspace + unzipped_len, record->buf + record->size,
record->ecc_notice_size);
- /* Swap out compresed contents with decompressed contents. */
+ /* Copy decompressed contents into an minimum-sized allocation. */
+ unzipped = kmemdup(workspace, unzipped_len + record->ecc_notice_size,
+ GFP_KERNEL);
+ kfree(workspace);
+ if (!unzipped)
+ return;
+
+ /* Swap out compressed contents with decompressed contents. */
kfree(record->buf);
- record->buf = decompressed;
+ record->buf = unzipped;
record->size = unzipped_len;
record->compressed = false;
}
diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c
index 24db02de1787..97fcef74e5af 100644
--- a/fs/pstore/pmsg.c
+++ b/fs/pstore/pmsg.c
@@ -33,7 +33,7 @@ static ssize_t write_pmsg(struct file *file, const char __user *buf,
record.size = count;
/* check outside lock, page in any data. write_user also checks */
- if (!access_ok(VERIFY_READ, buf, count))
+ if (!access_ok(buf, count))
return -EFAULT;
mutex_lock(&pmsg_lock);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index e02a9039b5ea..96f7d32cd184 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -124,19 +124,17 @@ static int ramoops_pstore_open(struct pstore_info *psi)
}
static struct persistent_ram_zone *
-ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max,
- u64 *id,
- enum pstore_type_id *typep, enum pstore_type_id type,
- bool update)
+ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id,
+ struct pstore_record *record)
{
struct persistent_ram_zone *prz;
- int i = (*c)++;
+ bool update = (record->type == PSTORE_TYPE_DMESG);
/* Give up if we never existed or have hit the end. */
- if (!przs || i >= max)
+ if (!przs)
return NULL;
- prz = przs[i];
+ prz = przs[id];
if (!prz)
return NULL;
@@ -147,8 +145,8 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max,
if (!persistent_ram_old_size(prz))
return NULL;
- *typep = type;
- *id = i;
+ record->type = prz->type;
+ record->id = id;
return prz;
}
@@ -255,10 +253,8 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
/* Find the next valid persistent_ram_zone for DMESG */
while (cxt->dump_read_cnt < cxt->max_dump_cnt && !prz) {
- prz = ramoops_get_next_prz(cxt->dprzs, &cxt->dump_read_cnt,
- cxt->max_dump_cnt, &record->id,
- &record->type,
- PSTORE_TYPE_DMESG, 1);
+ prz = ramoops_get_next_prz(cxt->dprzs, cxt->dump_read_cnt++,
+ record);
if (!prz_ok(prz))
continue;
header_length = ramoops_read_kmsg_hdr(persistent_ram_old(prz),
@@ -272,22 +268,18 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
}
}
- if (!prz_ok(prz))
- prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt,
- 1, &record->id, &record->type,
- PSTORE_TYPE_CONSOLE, 0);
+ if (!prz_ok(prz) && !cxt->console_read_cnt++)
+ prz = ramoops_get_next_prz(&cxt->cprz, 0 /* single */, record);
- if (!prz_ok(prz))
- prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt,
- 1, &record->id, &record->type,
- PSTORE_TYPE_PMSG, 0);
+ if (!prz_ok(prz) && !cxt->pmsg_read_cnt++)
+ prz = ramoops_get_next_prz(&cxt->mprz, 0 /* single */, record);
/* ftrace is last since it may want to dynamically allocate memory. */
if (!prz_ok(prz)) {
- if (!(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)) {
- prz = ramoops_get_next_prz(cxt->fprzs,
- &cxt->ftrace_read_cnt, 1, &record->id,
- &record->type, PSTORE_TYPE_FTRACE, 0);
+ if (!(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) &&
+ !cxt->ftrace_read_cnt++) {
+ prz = ramoops_get_next_prz(cxt->fprzs, 0 /* single */,
+ record);
} else {
/*
* Build a new dummy record which combines all the
@@ -299,15 +291,12 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
GFP_KERNEL);
if (!tmp_prz)
return -ENOMEM;
+ prz = tmp_prz;
free_prz = true;
while (cxt->ftrace_read_cnt < cxt->max_ftrace_cnt) {
prz_next = ramoops_get_next_prz(cxt->fprzs,
- &cxt->ftrace_read_cnt,
- cxt->max_ftrace_cnt,
- &record->id,
- &record->type,
- PSTORE_TYPE_FTRACE, 0);
+ cxt->ftrace_read_cnt++, record);
if (!prz_ok(prz_next))
continue;
@@ -321,7 +310,6 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
goto out;
}
record->id = 0;
- prz = tmp_prz;
}
}
@@ -611,6 +599,7 @@ static int ramoops_init_przs(const char *name,
goto fail;
}
*paddr += zone_sz;
+ prz_ar[i]->type = pstore_name_to_type(name);
}
*przs = prz_ar;
@@ -640,7 +629,7 @@ static int ramoops_init_prz(const char *name,
label = kasprintf(GFP_KERNEL, "ramoops:%s", name);
*prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info,
- cxt->memtype, 0, label);
+ cxt->memtype, PRZ_FLAG_ZAP_OLD, label);
if (IS_ERR(*prz)) {
int err = PTR_ERR(*prz);
@@ -649,9 +638,8 @@ static int ramoops_init_prz(const char *name,
return err;
}
- persistent_ram_zap(*prz);
-
*paddr += sz;
+ (*prz)->type = pstore_name_to_type(name);
return 0;
}
@@ -787,7 +775,7 @@ static int ramoops_probe(struct platform_device *pdev)
dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size
- cxt->pmsg_size;
- err = ramoops_init_przs("dump", dev, cxt, &cxt->dprzs, &paddr,
+ err = ramoops_init_przs("dmesg", dev, cxt, &cxt->dprzs, &paddr,
dump_mem_sz, cxt->record_size,
&cxt->max_dump_cnt, 0, 0);
if (err)
@@ -827,7 +815,6 @@ static int ramoops_probe(struct platform_device *pdev)
err = -ENOMEM;
goto fail_clear;
}
- spin_lock_init(&cxt->pstore.buf_lock);
cxt->pstore.flags = PSTORE_FLAGS_DMESG;
if (cxt->console_size)
@@ -855,9 +842,9 @@ static int ramoops_probe(struct platform_device *pdev)
ramoops_pmsg_size = pdata->pmsg_size;
ramoops_ftrace_size = pdata->ftrace_size;
- pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n",
+ pr_info("using 0x%lx@0x%llx, ecc: %d\n",
cxt->size, (unsigned long long)cxt->phys_addr,
- cxt->ecc_info.ecc_size, cxt->ecc_info.block_size);
+ cxt->ecc_info.ecc_size);
return 0;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 12e21f789194..f375c0735351 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -12,7 +12,7 @@
*
*/
-#define pr_fmt(fmt) "persistent_ram: " fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/device.h>
#include <linux/err.h>
@@ -29,6 +29,16 @@
#include <linux/vmalloc.h>
#include <asm/page.h>
+/**
+ * struct persistent_ram_buffer - persistent circular RAM buffer
+ *
+ * @sig:
+ * signature to indicate header (PERSISTENT_RAM_SIG xor PRZ-type value)
+ * @start:
+ * offset into @data where the beginning of the stored bytes begin
+ * @size:
+ * number of valid bytes stored in @data
+ */
struct persistent_ram_buffer {
uint32_t sig;
atomic_t start;
@@ -347,7 +357,7 @@ int notrace persistent_ram_write_user(struct persistent_ram_zone *prz,
int rem, ret = 0, c = count;
size_t start;
- if (unlikely(!access_ok(VERIFY_READ, s, count)))
+ if (unlikely(!access_ok(s, count)))
return -EFAULT;
if (unlikely(c > prz->buffer_size)) {
s += c - prz->buffer_size;
@@ -443,7 +453,8 @@ static void *persistent_ram_iomap(phys_addr_t start, size_t size,
void *va;
if (!request_mem_region(start, size, label ?: "ramoops")) {
- pr_err("request mem region (0x%llx@0x%llx) failed\n",
+ pr_err("request mem region (%s 0x%llx@0x%llx) failed\n",
+ label ?: "ramoops",
(unsigned long long)size, (unsigned long long)start);
return NULL;
}
@@ -489,32 +500,42 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
struct persistent_ram_ecc_info *ecc_info)
{
int ret;
+ bool zap = !!(prz->flags & PRZ_FLAG_ZAP_OLD);
ret = persistent_ram_init_ecc(prz, ecc_info);
- if (ret)
+ if (ret) {
+ pr_warn("ECC failed %s\n", prz->label);
return ret;
+ }
sig ^= PERSISTENT_RAM_SIG;
if (prz->buffer->sig == sig) {
+ if (buffer_size(prz) == 0) {
+ pr_debug("found existing empty buffer\n");
+ return 0;
+ }
+
if (buffer_size(prz) > prz->buffer_size ||
- buffer_start(prz) > buffer_size(prz))
+ buffer_start(prz) > buffer_size(prz)) {
pr_info("found existing invalid buffer, size %zu, start %zu\n",
buffer_size(prz), buffer_start(prz));
- else {
+ zap = true;
+ } else {
pr_debug("found existing buffer, size %zu, start %zu\n",
buffer_size(prz), buffer_start(prz));
persistent_ram_save_old(prz);
- return 0;
}
} else {
pr_debug("no valid data in buffer (sig = 0x%08x)\n",
prz->buffer->sig);
+ prz->buffer->sig = sig;
+ zap = true;
}
- /* Rewind missing or invalid memory area. */
- prz->buffer->sig = sig;
- persistent_ram_zap(prz);
+ /* Reset missing, invalid, or single-use memory area. */
+ if (zap)
+ persistent_ram_zap(prz);
return 0;
}
@@ -572,6 +593,12 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
if (ret)
goto err;
+ pr_debug("attached %s 0x%zx@0x%llx: %zu header, %zu data, %zu ecc (%d/%d)\n",
+ prz->label, prz->size, (unsigned long long)prz->paddr,
+ sizeof(*prz->buffer), prz->buffer_size,
+ prz->size - sizeof(*prz->buffer) - prz->buffer_size,
+ prz->ecc_info.ecc_size, prz->ecc_info.block_size);
+
return prz;
err:
persistent_ram_free(prz);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index f0cbf58ad4da..fd5dd806f1b9 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -791,7 +791,8 @@ static int quotactl_cmd_write(int cmd)
/* Return true if quotactl command is manipulating quota on/off state */
static bool quotactl_cmd_onoff(int cmd)
{
- return (cmd == Q_QUOTAON) || (cmd == Q_QUOTAOFF);
+ return (cmd == Q_QUOTAON) || (cmd == Q_QUOTAOFF) ||
+ (cmd == Q_XQUOTAON) || (cmd == Q_XQUOTAOFF);
}
/*
diff --git a/fs/read_write.c b/fs/read_write.c
index 58f30537c47a..ff3c5e6f87cf 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -442,7 +442,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
- if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
+ if (unlikely(!access_ok(buf, count)))
return -EFAULT;
ret = rw_verify_area(READ, file, pos, count);
@@ -538,7 +538,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
- if (unlikely(!access_ok(VERIFY_READ, buf, count)))
+ if (unlikely(!access_ok(buf, count)))
return -EFAULT;
ret = rw_verify_area(WRITE, file, pos, count);
@@ -718,9 +718,6 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
return ret;
}
-/* A write operation does a read from user space and vice versa */
-#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
-
/**
* rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
* into the kernel and check that it is valid.
@@ -810,7 +807,7 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
goto out;
}
if (type >= 0
- && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
+ && unlikely(!access_ok(buf, len))) {
ret = -EFAULT;
goto out;
}
@@ -856,7 +853,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
*ret_pointer = iov;
ret = -EFAULT;
- if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
+ if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
goto out;
/*
@@ -881,7 +878,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
if (len < 0) /* size_t not fitting in compat_ssize_t .. */
goto out;
if (type >= 0 &&
- !access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
+ !access_ok(compat_ptr(buf), len)) {
ret = -EFAULT;
goto out;
}
diff --git a/fs/readdir.c b/fs/readdir.c
index d97f548e6323..2f6a4534e0df 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -105,7 +105,7 @@ static int fillonedir(struct dir_context *ctx, const char *name, int namlen,
}
buf->result++;
dirent = buf->dirent;
- if (!access_ok(VERIFY_WRITE, dirent,
+ if (!access_ok(dirent,
(unsigned long)(dirent->d_name + namlen + 1) -
(unsigned long)dirent))
goto efault;
@@ -221,7 +221,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
};
int error;
- if (!access_ok(VERIFY_WRITE, dirent, count))
+ if (!access_ok(dirent, count))
return -EFAULT;
f = fdget_pos(fd);
@@ -304,7 +304,7 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
};
int error;
- if (!access_ok(VERIFY_WRITE, dirent, count))
+ if (!access_ok(dirent, count))
return -EFAULT;
f = fdget_pos(fd);
@@ -365,7 +365,7 @@ static int compat_fillonedir(struct dir_context *ctx, const char *name,
}
buf->result++;
dirent = buf->dirent;
- if (!access_ok(VERIFY_WRITE, dirent,
+ if (!access_ok(dirent,
(unsigned long)(dirent->d_name + namlen + 1) -
(unsigned long)dirent))
goto efault;
@@ -475,7 +475,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
};
int error;
- if (!access_ok(VERIFY_WRITE, dirent, count))
+ if (!access_ok(dirent, count))
return -EFAULT;
f = fdget_pos(fd);
diff --git a/fs/select.c b/fs/select.c
index 22b3bf89f051..d0f35dbc0e8f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -287,12 +287,18 @@ int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec)
return 0;
}
+enum poll_time_type {
+ PT_TIMEVAL = 0,
+ PT_OLD_TIMEVAL = 1,
+ PT_TIMESPEC = 2,
+ PT_OLD_TIMESPEC = 3,
+};
+
static int poll_select_copy_remaining(struct timespec64 *end_time,
void __user *p,
- int timeval, int ret)
+ enum poll_time_type pt_type, int ret)
{
struct timespec64 rts;
- struct timeval rtv;
if (!p)
return ret;
@@ -310,18 +316,40 @@ static int poll_select_copy_remaining(struct timespec64 *end_time,
rts.tv_sec = rts.tv_nsec = 0;
- if (timeval) {
- if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
- memset(&rtv, 0, sizeof(rtv));
- rtv.tv_sec = rts.tv_sec;
- rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
+ switch (pt_type) {
+ case PT_TIMEVAL:
+ {
+ struct timeval rtv;
- if (!copy_to_user(p, &rtv, sizeof(rtv)))
+ if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
+ memset(&rtv, 0, sizeof(rtv));
+ rtv.tv_sec = rts.tv_sec;
+ rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
+ if (!copy_to_user(p, &rtv, sizeof(rtv)))
+ return ret;
+ }
+ break;
+ case PT_OLD_TIMEVAL:
+ {
+ struct old_timeval32 rtv;
+
+ rtv.tv_sec = rts.tv_sec;
+ rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
+ if (!copy_to_user(p, &rtv, sizeof(rtv)))
+ return ret;
+ }
+ break;
+ case PT_TIMESPEC:
+ if (!put_timespec64(&rts, p))
return ret;
-
- } else if (!put_timespec64(&rts, p))
- return ret;
-
+ break;
+ case PT_OLD_TIMESPEC:
+ if (!put_old_timespec32(&rts, p))
+ return ret;
+ break;
+ default:
+ BUG();
+ }
/*
* If an application puts its timeval in read-only memory, we
* don't want the Linux-specific update to the timeval to
@@ -353,9 +381,6 @@ typedef struct {
#define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long))
/*
- * We do a VERIFY_WRITE here even though we are only reading this time:
- * we'll write to it eventually..
- *
* Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
*/
static inline
@@ -689,7 +714,7 @@ static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
}
ret = core_sys_select(n, inp, outp, exp, to);
- ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
+ ret = poll_select_copy_remaining(&end_time, tvp, PT_TIMEVAL, ret);
return ret;
}
@@ -701,49 +726,41 @@ SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
}
static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
- fd_set __user *exp, struct timespec __user *tsp,
- const sigset_t __user *sigmask, size_t sigsetsize)
+ fd_set __user *exp, void __user *tsp,
+ const sigset_t __user *sigmask, size_t sigsetsize,
+ enum poll_time_type type)
{
sigset_t ksigmask, sigsaved;
struct timespec64 ts, end_time, *to = NULL;
int ret;
if (tsp) {
- if (get_timespec64(&ts, tsp))
- return -EFAULT;
+ switch (type) {
+ case PT_TIMESPEC:
+ if (get_timespec64(&ts, tsp))
+ return -EFAULT;
+ break;
+ case PT_OLD_TIMESPEC:
+ if (get_old_timespec32(&ts, tsp))
+ return -EFAULT;
+ break;
+ default:
+ BUG();
+ }
to = &end_time;
if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
return -EINVAL;
}
- if (sigmask) {
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(sigset_t))
- return -EINVAL;
- if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
- return -EFAULT;
-
- sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
+ ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (ret)
+ return ret;
ret = core_sys_select(n, inp, outp, exp, to);
- ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
+ ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
- if (ret == -ERESTARTNOHAND) {
- /*
- * Don't restore the signal mask yet. Let do_signal() deliver
- * the signal on the way back to userspace, before the signal
- * mask is restored.
- */
- if (sigmask) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- }
- } else if (sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ restore_user_sigmask(sigmask, &sigsaved);
return ret;
}
@@ -755,23 +772,45 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
* the sigset size.
*/
SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
- fd_set __user *, exp, struct timespec __user *, tsp,
+ fd_set __user *, exp, struct __kernel_timespec __user *, tsp,
void __user *, sig)
{
size_t sigsetsize = 0;
sigset_t __user *up = NULL;
if (sig) {
- if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
+ if (!access_ok(sig, sizeof(void *)+sizeof(size_t))
|| __get_user(up, (sigset_t __user * __user *)sig)
|| __get_user(sigsetsize,
(size_t __user *)(sig+sizeof(void *))))
return -EFAULT;
}
- return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize);
+ return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize, PT_TIMESPEC);
}
+#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
+
+SYSCALL_DEFINE6(pselect6_time32, int, n, fd_set __user *, inp, fd_set __user *, outp,
+ fd_set __user *, exp, struct old_timespec32 __user *, tsp,
+ void __user *, sig)
+{
+ size_t sigsetsize = 0;
+ sigset_t __user *up = NULL;
+
+ if (sig) {
+ if (!access_ok(sig, sizeof(void *)+sizeof(size_t))
+ || __get_user(up, (sigset_t __user * __user *)sig)
+ || __get_user(sigsetsize,
+ (size_t __user *)(sig+sizeof(void *))))
+ return -EFAULT;
+ }
+
+ return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize, PT_OLD_TIMESPEC);
+}
+
+#endif
+
#ifdef __ARCH_WANT_SYS_OLD_SELECT
struct sel_arg_struct {
unsigned long n;
@@ -1045,7 +1084,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
}
SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
- struct timespec __user *, tsp, const sigset_t __user *, sigmask,
+ struct __kernel_timespec __user *, tsp, const sigset_t __user *, sigmask,
size_t, sigsetsize)
{
sigset_t ksigmask, sigsaved;
@@ -1061,89 +1100,62 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
return -EINVAL;
}
- if (sigmask) {
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(sigset_t))
- return -EINVAL;
- if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
- return -EFAULT;
-
- sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
+ ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (ret)
+ return ret;
ret = do_sys_poll(ufds, nfds, to);
+ restore_user_sigmask(sigmask, &sigsaved);
+
/* We can restart this syscall, usually */
- if (ret == -EINTR) {
- /*
- * Don't restore the signal mask yet. Let do_signal() deliver
- * the signal on the way back to userspace, before the signal
- * mask is restored.
- */
- if (sigmask) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- }
+ if (ret == -EINTR)
ret = -ERESTARTNOHAND;
- } else if (sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
- ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
+ ret = poll_select_copy_remaining(&end_time, tsp, PT_TIMESPEC, ret);
return ret;
}
-#ifdef CONFIG_COMPAT
-#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
+#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
-static
-int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *p,
- int timeval, int ret)
+SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds,
+ struct old_timespec32 __user *, tsp, const sigset_t __user *, sigmask,
+ size_t, sigsetsize)
{
- struct timespec64 ts;
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 ts, end_time, *to = NULL;
+ int ret;
- if (!p)
- return ret;
+ if (tsp) {
+ if (get_old_timespec32(&ts, tsp))
+ return -EFAULT;
- if (current->personality & STICKY_TIMEOUTS)
- goto sticky;
+ to = &end_time;
+ if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
+ return -EINVAL;
+ }
- /* No update for zero timeout */
- if (!end_time->tv_sec && !end_time->tv_nsec)
+ ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (ret)
return ret;
- ktime_get_ts64(&ts);
- ts = timespec64_sub(*end_time, ts);
- if (ts.tv_sec < 0)
- ts.tv_sec = ts.tv_nsec = 0;
+ ret = do_sys_poll(ufds, nfds, to);
- if (timeval) {
- struct old_timeval32 rtv;
+ restore_user_sigmask(sigmask, &sigsaved);
- rtv.tv_sec = ts.tv_sec;
- rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
+ /* We can restart this syscall, usually */
+ if (ret == -EINTR)
+ ret = -ERESTARTNOHAND;
- if (!copy_to_user(p, &rtv, sizeof(rtv)))
- return ret;
- } else {
- if (!put_old_timespec32(&ts, p))
- return ret;
- }
- /*
- * If an application puts its timeval in read-only memory, we
- * don't want the Linux-specific update to the timeval to
- * cause a fault after the select has completed
- * successfully. However, because we're not updating the
- * timeval, we can't restart the system call.
- */
+ ret = poll_select_copy_remaining(&end_time, tsp, PT_OLD_TIMESPEC, ret);
-sticky:
- if (ret == -ERESTARTNOHAND)
- ret = -EINTR;
return ret;
}
+#endif
+
+#ifdef CONFIG_COMPAT
+#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
/*
* Ooo, nasty. We need here to frob 32-bit unsigned longs to
@@ -1275,7 +1287,7 @@ static int do_compat_select(int n, compat_ulong_t __user *inp,
}
ret = compat_core_sys_select(n, inp, outp, exp, to);
- ret = compat_poll_select_copy_remaining(&end_time, tvp, 1, ret);
+ ret = poll_select_copy_remaining(&end_time, tvp, PT_OLD_TIMEVAL, ret);
return ret;
}
@@ -1307,52 +1319,66 @@ COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
static long do_compat_pselect(int n, compat_ulong_t __user *inp,
compat_ulong_t __user *outp, compat_ulong_t __user *exp,
- struct old_timespec32 __user *tsp, compat_sigset_t __user *sigmask,
- compat_size_t sigsetsize)
+ void __user *tsp, compat_sigset_t __user *sigmask,
+ compat_size_t sigsetsize, enum poll_time_type type)
{
sigset_t ksigmask, sigsaved;
struct timespec64 ts, end_time, *to = NULL;
int ret;
if (tsp) {
- if (get_old_timespec32(&ts, tsp))
- return -EFAULT;
+ switch (type) {
+ case PT_OLD_TIMESPEC:
+ if (get_old_timespec32(&ts, tsp))
+ return -EFAULT;
+ break;
+ case PT_TIMESPEC:
+ if (get_timespec64(&ts, tsp))
+ return -EFAULT;
+ break;
+ default:
+ BUG();
+ }
to = &end_time;
if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
return -EINVAL;
}
- if (sigmask) {
- if (sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
- if (get_compat_sigset(&ksigmask, sigmask))
- return -EFAULT;
-
- sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
+ ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (ret)
+ return ret;
ret = compat_core_sys_select(n, inp, outp, exp, to);
- ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret);
+ ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
- if (ret == -ERESTARTNOHAND) {
- /*
- * Don't restore the signal mask yet. Let do_signal() deliver
- * the signal on the way back to userspace, before the signal
- * mask is restored.
- */
- if (sigmask) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- }
- } else if (sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ restore_user_sigmask(sigmask, &sigsaved);
return ret;
}
+COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp,
+ compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
+ struct __kernel_timespec __user *, tsp, void __user *, sig)
+{
+ compat_size_t sigsetsize = 0;
+ compat_uptr_t up = 0;
+
+ if (sig) {
+ if (!access_ok(sig,
+ sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
+ __get_user(up, (compat_uptr_t __user *)sig) ||
+ __get_user(sigsetsize,
+ (compat_size_t __user *)(sig+sizeof(up))))
+ return -EFAULT;
+ }
+
+ return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up),
+ sigsetsize, PT_TIMESPEC);
+}
+
+#if defined(CONFIG_COMPAT_32BIT_TIME)
+
COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
struct old_timespec32 __user *, tsp, void __user *, sig)
@@ -1361,17 +1387,21 @@ COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
compat_uptr_t up = 0;
if (sig) {
- if (!access_ok(VERIFY_READ, sig,
+ if (!access_ok(sig,
sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
__get_user(up, (compat_uptr_t __user *)sig) ||
__get_user(sigsetsize,
(compat_size_t __user *)(sig+sizeof(up))))
return -EFAULT;
}
+
return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up),
- sigsetsize);
+ sigsetsize, PT_OLD_TIMESPEC);
}
+#endif
+
+#if defined(CONFIG_COMPAT_32BIT_TIME)
COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
unsigned int, nfds, struct old_timespec32 __user *, tsp,
const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
@@ -1389,36 +1419,57 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
return -EINVAL;
}
- if (sigmask) {
- if (sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
- if (get_compat_sigset(&ksigmask, sigmask))
+ ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (ret)
+ return ret;
+
+ ret = do_sys_poll(ufds, nfds, to);
+
+ restore_user_sigmask(sigmask, &sigsaved);
+
+ /* We can restart this syscall, usually */
+ if (ret == -EINTR)
+ ret = -ERESTARTNOHAND;
+
+ ret = poll_select_copy_remaining(&end_time, tsp, PT_OLD_TIMESPEC, ret);
+
+ return ret;
+}
+#endif
+
+/* New compat syscall for 64 bit time_t*/
+COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds,
+ unsigned int, nfds, struct __kernel_timespec __user *, tsp,
+ const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
+{
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 ts, end_time, *to = NULL;
+ int ret;
+
+ if (tsp) {
+ if (get_timespec64(&ts, tsp))
return -EFAULT;
- sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ to = &end_time;
+ if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
+ return -EINVAL;
}
+ ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (ret)
+ return ret;
+
ret = do_sys_poll(ufds, nfds, to);
+ restore_user_sigmask(sigmask, &sigsaved);
+
/* We can restart this syscall, usually */
- if (ret == -EINTR) {
- /*
- * Don't restore the signal mask yet. Let do_signal() deliver
- * the signal on the way back to userspace, before the signal
- * mask is restored.
- */
- if (sigmask) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- }
+ if (ret == -EINTR)
ret = -ERESTARTNOHAND;
- } else if (sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
- ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret);
+ ret = poll_select_copy_remaining(&end_time, tsp, PT_TIMESPEC, ret);
return ret;
}
+
#endif
diff --git a/fs/super.c b/fs/super.c
index ca53a08497ed..48e25eba8465 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -35,6 +35,7 @@
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
#include <linux/user_namespace.h>
+#include <uapi/linux/mount.h>
#include "internal.h"
static int thaw_super_locked(struct super_block *sb);
@@ -1245,17 +1246,13 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
{
struct dentry *root;
struct super_block *sb;
- char *secdata = NULL;
int error = -ENOMEM;
+ void *sec_opts = NULL;
if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
- secdata = alloc_secdata();
- if (!secdata)
- goto out;
-
- error = security_sb_copy_data(data, secdata);
+ error = security_sb_eat_lsm_opts(data, &sec_opts);
if (error)
- goto out_free_secdata;
+ return ERR_PTR(error);
}
root = type->mount(type, flags, name, data);
@@ -1276,10 +1273,16 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
smp_wmb();
sb->s_flags |= SB_BORN;
- error = security_sb_kern_mount(sb, flags, secdata);
+ error = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL);
if (error)
goto out_sb;
+ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT))) {
+ error = security_sb_kern_mount(sb);
+ if (error)
+ goto out_sb;
+ }
+
/*
* filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
* but s_maxbytes was an unsigned long long for many releases. Throw
@@ -1290,14 +1293,13 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
"negative value (%lld)\n", type->name, sb->s_maxbytes);
up_write(&sb->s_umount);
- free_secdata(secdata);
+ security_free_mnt_opts(&sec_opts);
return root;
out_sb:
dput(root);
deactivate_locked_super(sb);
out_free_secdata:
- free_secdata(secdata);
-out:
+ security_free_mnt_opts(&sec_opts);
return ERR_PTR(error);
}
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index feeae8081c22..aa85f2874a9f 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -43,7 +43,8 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
kuid_t uid;
kgid_t gid;
- BUG_ON(!kobj);
+ if (WARN_ON(!kobj))
+ return -EINVAL;
if (kobj->parent)
parent = kobj->parent->sd;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 0a7252aecfa5..51398457fe00 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -325,7 +325,8 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
kuid_t uid;
kgid_t gid;
- BUG_ON(!kobj || !kobj->sd || !attr);
+ if (WARN_ON(!kobj || !kobj->sd || !attr))
+ return -EINVAL;
kobject_get_ownership(kobj, &uid, &gid);
return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode,
@@ -334,7 +335,7 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
}
EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
-int sysfs_create_files(struct kobject *kobj, const struct attribute **ptr)
+int sysfs_create_files(struct kobject *kobj, const struct attribute * const *ptr)
{
int err = 0;
int i;
@@ -493,7 +494,7 @@ bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr)
return ret;
}
-void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr)
+void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *ptr)
{
int i;
for (i = 0; ptr[i]; i++)
@@ -537,7 +538,8 @@ int sysfs_create_bin_file(struct kobject *kobj,
kuid_t uid;
kgid_t gid;
- BUG_ON(!kobj || !kobj->sd || !attr);
+ if (WARN_ON(!kobj || !kobj->sd || !attr))
+ return -EINVAL;
kobject_get_ownership(kobj, &uid, &gid);
return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true,
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 1eb2d6307663..57038604d4a8 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -112,7 +112,8 @@ static int internal_create_group(struct kobject *kobj, int update,
kgid_t gid;
int error;
- BUG_ON(!kobj || (!update && !kobj->sd));
+ if (WARN_ON(!kobj || (!update && !kobj->sd)))
+ return -EINVAL;
/* Updates may happen before the object has been instantiated */
if (unlikely(update && !kobj->sd))
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 215c225b2ca1..c4deecc80f67 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -23,7 +23,8 @@ static int sysfs_do_create_link_sd(struct kernfs_node *parent,
{
struct kernfs_node *kn, *target = NULL;
- BUG_ON(!name || !parent);
+ if (WARN_ON(!name || !parent))
+ return -EINVAL;
/*
* We don't own @target_kobj and it may be removed at any time.
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index 529856fbccd0..bc1e082d921d 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -12,9 +12,10 @@ config UBIFS_FS
help
UBIFS is a file system for flash devices which works on top of UBI.
+if UBIFS_FS
+
config UBIFS_FS_ADVANCED_COMPR
bool "Advanced compression options"
- depends on UBIFS_FS
help
This option allows to explicitly choose which compressions, if any,
are enabled in UBIFS. Removing compressors means inability to read
@@ -24,7 +25,6 @@ config UBIFS_FS_ADVANCED_COMPR
config UBIFS_FS_LZO
bool "LZO compression support" if UBIFS_FS_ADVANCED_COMPR
- depends on UBIFS_FS
default y
help
LZO compressor is generally faster than zlib but compresses worse.
@@ -32,14 +32,12 @@ config UBIFS_FS_LZO
config UBIFS_FS_ZLIB
bool "ZLIB compression support" if UBIFS_FS_ADVANCED_COMPR
- depends on UBIFS_FS
default y
help
Zlib compresses better than LZO but it is slower. Say 'Y' if unsure.
config UBIFS_ATIME_SUPPORT
- bool "Access time support" if UBIFS_FS
- depends on UBIFS_FS
+ bool "Access time support"
default n
help
Originally UBIFS did not support atime, because it looked like a bad idea due
@@ -54,7 +52,6 @@ config UBIFS_ATIME_SUPPORT
config UBIFS_FS_XATTR
bool "UBIFS XATTR support"
- depends on UBIFS_FS
default y
help
Saying Y here includes support for extended attributes (xattrs).
@@ -65,7 +62,7 @@ config UBIFS_FS_XATTR
config UBIFS_FS_ENCRYPTION
bool "UBIFS Encryption"
- depends on UBIFS_FS && UBIFS_FS_XATTR && BLOCK
+ depends on UBIFS_FS_XATTR && BLOCK
select FS_ENCRYPTION
default n
help
@@ -76,7 +73,7 @@ config UBIFS_FS_ENCRYPTION
config UBIFS_FS_SECURITY
bool "UBIFS Security Labels"
- depends on UBIFS_FS && UBIFS_FS_XATTR
+ depends on UBIFS_FS_XATTR
default y
help
Security labels provide an access control facility to support Linux
@@ -89,6 +86,7 @@ config UBIFS_FS_SECURITY
config UBIFS_FS_AUTHENTICATION
bool "UBIFS authentication support"
+ depends on KEYS
select CRYPTO_HMAC
help
Enable authentication support for UBIFS. This feature offers protection
@@ -96,3 +94,5 @@ config UBIFS_FS_AUTHENTICATION
If you say yes here you should also select a hashing algorithm such as
sha256, these are not selected automatically since there are many
different options.
+
+endif # UBIFS_FS
diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c
index 124e965a28b3..5bf5fd08879e 100644
--- a/fs/ubifs/auth.c
+++ b/fs/ubifs/auth.c
@@ -269,8 +269,7 @@ int ubifs_init_authentication(struct ubifs_info *c)
goto out;
}
- c->hash_tfm = crypto_alloc_shash(c->auth_hash_name, 0,
- CRYPTO_ALG_ASYNC);
+ c->hash_tfm = crypto_alloc_shash(c->auth_hash_name, 0, 0);
if (IS_ERR(c->hash_tfm)) {
err = PTR_ERR(c->hash_tfm);
ubifs_err(c, "Can not allocate %s: %d",
@@ -286,7 +285,7 @@ int ubifs_init_authentication(struct ubifs_info *c)
goto out_free_hash;
}
- c->hmac_tfm = crypto_alloc_shash(hmac_name, 0, CRYPTO_ALG_ASYNC);
+ c->hmac_tfm = crypto_alloc_shash(hmac_name, 0, 0);
if (IS_ERR(c->hmac_tfm)) {
err = PTR_ERR(c->hmac_tfm);
ubifs_err(c, "Can not allocate %s: %d", hmac_name, err);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 1b78f2e09218..5d2ffb1a45fc 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1481,7 +1481,7 @@ static int ubifs_migrate_page(struct address_space *mapping,
{
int rc;
- rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+ rc = migrate_page_move_mapping(mapping, newpage, page, mode, 0);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index d1d5e96350dd..b0c5f06128b5 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1675,6 +1675,12 @@ int ubifs_lpt_calc_hash(struct ubifs_info *c, u8 *hash)
if (!ubifs_authenticated(c))
return 0;
+ if (!c->nroot) {
+ err = ubifs_read_nnode(c, NULL, 0);
+ if (err)
+ return err;
+ }
+
desc = ubifs_hash_get_desc(c);
if (IS_ERR(desc))
return PTR_ERR(desc);
@@ -1685,12 +1691,6 @@ int ubifs_lpt_calc_hash(struct ubifs_info *c, u8 *hash)
goto out;
}
- if (!c->nroot) {
- err = ubifs_read_nnode(c, NULL, 0);
- if (err)
- return err;
- }
-
cnode = (struct ubifs_cnode *)c->nroot;
while (cnode) {
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 75f961c4c044..0a0e65c07c6d 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -213,6 +213,38 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
}
/**
+ * inode_still_linked - check whether inode in question will be re-linked.
+ * @c: UBIFS file-system description object
+ * @rino: replay entry to test
+ *
+ * O_TMPFILE files can be re-linked, this means link count goes from 0 to 1.
+ * This case needs special care, otherwise all references to the inode will
+ * be removed upon the first replay entry of an inode with link count 0
+ * is found.
+ */
+static bool inode_still_linked(struct ubifs_info *c, struct replay_entry *rino)
+{
+ struct replay_entry *r;
+
+ ubifs_assert(c, rino->deletion);
+ ubifs_assert(c, key_type(c, &rino->key) == UBIFS_INO_KEY);
+
+ /*
+ * Find the most recent entry for the inode behind @rino and check
+ * whether it is a deletion.
+ */
+ list_for_each_entry_reverse(r, &c->replay_list, list) {
+ ubifs_assert(c, r->sqnum >= rino->sqnum);
+ if (key_inum(c, &r->key) == key_inum(c, &rino->key))
+ return r->deletion == 0;
+
+ }
+
+ ubifs_assert(c, 0);
+ return false;
+}
+
+/**
* apply_replay_entry - apply a replay entry to the TNC.
* @c: UBIFS file-system description object
* @r: replay entry to apply
@@ -239,6 +271,11 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
{
ino_t inum = key_inum(c, &r->key);
+ if (inode_still_linked(c, r)) {
+ err = 0;
+ break;
+ }
+
err = ubifs_tnc_remove_ino(c, inum);
break;
}
@@ -533,6 +570,28 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
return data == 0xFFFFFFFF;
}
+/* authenticate_sleb_hash and authenticate_sleb_hmac are split out for stack usage */
+static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_hash, u8 *hash)
+{
+ SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm);
+
+ hash_desc->tfm = c->hash_tfm;
+ hash_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ ubifs_shash_copy_state(c, log_hash, hash_desc);
+ return crypto_shash_final(hash_desc, hash);
+}
+
+static int authenticate_sleb_hmac(struct ubifs_info *c, u8 *hash, u8 *hmac)
+{
+ SHASH_DESC_ON_STACK(hmac_desc, c->hmac_tfm);
+
+ hmac_desc->tfm = c->hmac_tfm;
+ hmac_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ return crypto_shash_digest(hmac_desc, hash, c->hash_len, hmac);
+}
+
/**
* authenticate_sleb - authenticate one scan LEB
* @c: UBIFS file-system description object
@@ -574,21 +633,12 @@ static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
if (snod->type == UBIFS_AUTH_NODE) {
struct ubifs_auth_node *auth = snod->node;
- SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm);
- SHASH_DESC_ON_STACK(hmac_desc, c->hmac_tfm);
-
- hash_desc->tfm = c->hash_tfm;
- hash_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- ubifs_shash_copy_state(c, log_hash, hash_desc);
- err = crypto_shash_final(hash_desc, hash);
+ err = authenticate_sleb_hash(c, log_hash, hash);
if (err)
goto out;
- hmac_desc->tfm = c->hmac_tfm;
- hmac_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- err = crypto_shash_digest(hmac_desc, hash, c->hash_len,
- hmac);
+ err = authenticate_sleb_hmac(c, hash, hmac);
if (err)
goto out;
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 75a69dd26d6e..3da90c951c23 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -63,6 +63,17 @@
/* Default time granularity in nanoseconds */
#define DEFAULT_TIME_GRAN 1000000000
+static int get_default_compressor(struct ubifs_info *c)
+{
+ if (ubifs_compr_present(c, UBIFS_COMPR_LZO))
+ return UBIFS_COMPR_LZO;
+
+ if (ubifs_compr_present(c, UBIFS_COMPR_ZLIB))
+ return UBIFS_COMPR_ZLIB;
+
+ return UBIFS_COMPR_NONE;
+}
+
/**
* create_default_filesystem - format empty UBI volume.
* @c: UBIFS file-system description object
@@ -207,7 +218,7 @@ static int create_default_filesystem(struct ubifs_info *c)
if (c->mount_opts.override_compr)
sup->default_compr = cpu_to_le16(c->mount_opts.compr_type);
else
- sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
+ sup->default_compr = cpu_to_le16(get_default_compressor(c));
generate_random_uuid(sup->uuid);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 5df554a9f9c9..ae796e10f68b 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1357,6 +1357,12 @@ reread:
iinfo->i_alloc_type = le16_to_cpu(fe->icbTag.flags) &
ICBTAG_FLAG_AD_MASK;
+ if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_SHORT &&
+ iinfo->i_alloc_type != ICBTAG_FLAG_AD_LONG &&
+ iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+ ret = -EIO;
+ goto out;
+ }
iinfo->i_unique = 0;
iinfo->i_lenEAttr = 0;
iinfo->i_lenExtents = 0;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 7a85e609fc27..89800fc7dc9d 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -53,7 +53,7 @@ struct userfaultfd_ctx {
/* a refile sequence protected by fault_pending_wqh lock */
struct seqcount refile_seq;
/* pseudo fd refcounting */
- atomic_t refcount;
+ refcount_t refcount;
/* userfaultfd syscall flags */
unsigned int flags;
/* features requested from the userspace */
@@ -140,8 +140,7 @@ out:
*/
static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
{
- if (!atomic_inc_not_zero(&ctx->refcount))
- BUG();
+ refcount_inc(&ctx->refcount);
}
/**
@@ -154,7 +153,7 @@ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
*/
static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
{
- if (atomic_dec_and_test(&ctx->refcount)) {
+ if (refcount_dec_and_test(&ctx->refcount)) {
VM_BUG_ON(spin_is_locked(&ctx->fault_pending_wqh.lock));
VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh));
VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock));
@@ -686,7 +685,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
return -ENOMEM;
}
- atomic_set(&ctx->refcount, 1);
+ refcount_set(&ctx->refcount, 1);
ctx->flags = octx->flags;
ctx->state = UFFD_STATE_RUNNING;
ctx->features = octx->features;
@@ -736,10 +735,18 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
struct userfaultfd_ctx *ctx;
ctx = vma->vm_userfaultfd_ctx.ctx;
- if (ctx && (ctx->features & UFFD_FEATURE_EVENT_REMAP)) {
+
+ if (!ctx)
+ return;
+
+ if (ctx->features & UFFD_FEATURE_EVENT_REMAP) {
vm_ctx->ctx = ctx;
userfaultfd_ctx_get(ctx);
WRITE_ONCE(ctx->mmap_changing, true);
+ } else {
+ /* Drop uffd context if remap feature not enabled */
+ vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+ vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING);
}
}
@@ -926,7 +933,7 @@ static inline struct userfaultfd_wait_queue *find_userfault_in(
wait_queue_entry_t *wq;
struct userfaultfd_wait_queue *uwq;
- VM_BUG_ON(!spin_is_locked(&wqh->lock));
+ lockdep_assert_held(&wqh->lock);
uwq = NULL;
if (!waitqueue_active(wqh))
@@ -1927,7 +1934,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
if (!ctx)
return -ENOMEM;
- atomic_set(&ctx->refcount, 1);
+ refcount_set(&ctx->refcount, 1);
ctx->flags = flags;
ctx->features = 0;
ctx->state = UFFD_STATE_WAIT_API;
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 9345802c99f7..999ad8d00d43 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -414,7 +414,6 @@ xfs_ag_extend_space(
struct aghdr_init_data *id,
xfs_extlen_t len)
{
- struct xfs_owner_info oinfo;
struct xfs_buf *bp;
struct xfs_agi *agi;
struct xfs_agf *agf;
@@ -448,17 +447,17 @@ xfs_ag_extend_space(
/*
* Free the new space.
*
- * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
+ * XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that
* this doesn't actually exist in the rmap btree.
*/
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
error = xfs_rmap_free(tp, bp, id->agno,
be32_to_cpu(agf->agf_length) - len,
- len, &oinfo);
+ len, &XFS_RMAP_OINFO_SKIP_UPDATE);
if (error)
return error;
return xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, id->agno,
be32_to_cpu(agf->agf_length) - len),
- len, &oinfo, XFS_AG_RESV_NONE);
+ len, &XFS_RMAP_OINFO_SKIP_UPDATE,
+ XFS_AG_RESV_NONE);
}
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index e1c0c0d2f1b0..b715668886a4 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1594,7 +1594,6 @@ xfs_alloc_ag_vextent_small(
xfs_extlen_t *flenp, /* result length */
int *stat) /* status: 0-freelist, 1-normal/none */
{
- struct xfs_owner_info oinfo;
int error;
xfs_agblock_t fbno;
xfs_extlen_t flen;
@@ -1648,9 +1647,8 @@ xfs_alloc_ag_vextent_small(
* doesn't live in the free space, we need to clear
* out the OWN_AG rmap.
*/
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
error = xfs_rmap_free(args->tp, args->agbp, args->agno,
- fbno, 1, &oinfo);
+ fbno, 1, &XFS_RMAP_OINFO_AG);
if (error)
goto error0;
@@ -1694,28 +1692,28 @@ error0:
*/
STATIC int
xfs_free_ag_extent(
- xfs_trans_t *tp,
- xfs_buf_t *agbp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- struct xfs_owner_info *oinfo,
- enum xfs_ag_resv_type type)
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type)
{
- xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
- xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
- int error; /* error return value */
- xfs_agblock_t gtbno; /* start of right neighbor block */
- xfs_extlen_t gtlen; /* length of right neighbor block */
- int haveleft; /* have a left neighbor block */
- int haveright; /* have a right neighbor block */
- int i; /* temp, result code */
- xfs_agblock_t ltbno; /* start of left neighbor block */
- xfs_extlen_t ltlen; /* length of left neighbor block */
- xfs_mount_t *mp; /* mount point struct for filesystem */
- xfs_agblock_t nbno; /* new starting block of freespace */
- xfs_extlen_t nlen; /* new length of freespace */
- xfs_perag_t *pag; /* per allocation group data */
+ struct xfs_mount *mp;
+ struct xfs_perag *pag;
+ struct xfs_btree_cur *bno_cur;
+ struct xfs_btree_cur *cnt_cur;
+ xfs_agblock_t gtbno; /* start of right neighbor */
+ xfs_extlen_t gtlen; /* length of right neighbor */
+ xfs_agblock_t ltbno; /* start of left neighbor */
+ xfs_extlen_t ltlen; /* length of left neighbor */
+ xfs_agblock_t nbno; /* new starting block of freesp */
+ xfs_extlen_t nlen; /* new length of freespace */
+ int haveleft; /* have a left neighbor */
+ int haveright; /* have a right neighbor */
+ int i;
+ int error;
bno_cur = cnt_cur = NULL;
mp = tp->t_mountp;
@@ -2314,10 +2312,11 @@ xfs_alloc_fix_freelist(
* repair/rmap.c in xfsprogs for details.
*/
memset(&targs, 0, sizeof(targs));
+ /* struct copy below */
if (flags & XFS_ALLOC_FLAG_NORMAP)
- xfs_rmap_skip_owner_update(&targs.oinfo);
+ targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
else
- xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
+ targs.oinfo = XFS_RMAP_OINFO_AG;
while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
if (error)
@@ -2435,7 +2434,6 @@ xfs_alloc_get_freelist(
be32_add_cpu(&agf->agf_flcount, -1);
xfs_trans_agflist_delta(tp, -1);
pag->pagf_flcount--;
- xfs_perag_put(pag);
logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
if (btreeblk) {
@@ -2443,6 +2441,7 @@ xfs_alloc_get_freelist(
pag->pagf_btreeblks++;
logflags |= XFS_AGF_BTREEBLKS;
}
+ xfs_perag_put(pag);
xfs_alloc_log_agf(tp, agbp, logflags);
*bnop = bno;
@@ -3008,21 +3007,21 @@ out:
* Just break up the extent address and hand off to xfs_free_ag_extent
* after fixing up the freelist.
*/
-int /* error */
+int
__xfs_free_extent(
- struct xfs_trans *tp, /* transaction pointer */
- xfs_fsblock_t bno, /* starting block number of extent */
- xfs_extlen_t len, /* length of extent */
- struct xfs_owner_info *oinfo, /* extent owner */
- enum xfs_ag_resv_type type, /* block reservation type */
- bool skip_discard)
+ struct xfs_trans *tp,
+ xfs_fsblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type,
+ bool skip_discard)
{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_buf *agbp;
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
- xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
- int error;
- unsigned int busy_flags = 0;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_buf *agbp;
+ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
+ xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
+ int error;
+ unsigned int busy_flags = 0;
ASSERT(len != 0);
ASSERT(type != XFS_AG_RESV_AGFL);
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 00cd5ec4cb6b..d6ed5d2c07c2 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -182,7 +182,7 @@ __xfs_free_extent(
struct xfs_trans *tp, /* transaction pointer */
xfs_fsblock_t bno, /* starting block number of extent */
xfs_extlen_t len, /* length of extent */
- struct xfs_owner_info *oinfo, /* extent owner */
+ const struct xfs_owner_info *oinfo, /* extent owner */
enum xfs_ag_resv_type type, /* block reservation type */
bool skip_discard);
@@ -191,7 +191,7 @@ xfs_free_extent(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_extlen_t len,
- struct xfs_owner_info *oinfo,
+ const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type)
{
return __xfs_free_extent(tp, bno, len, oinfo, type, false);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 19e921d1586f..332eefa2700b 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -536,7 +536,7 @@ __xfs_bmap_add_free(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
- struct xfs_owner_info *oinfo,
+ const struct xfs_owner_info *oinfo,
bool skip_discard)
{
struct xfs_extent_free_item *new; /* new element */
@@ -564,7 +564,7 @@ __xfs_bmap_add_free(
if (oinfo)
new->xefi_oinfo = *oinfo;
else
- xfs_rmap_skip_owner_update(&new->xefi_oinfo);
+ new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
new->xefi_skip_discard = skip_discard;
trace_xfs_bmap_free_defer(tp->t_mountp,
XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
@@ -3453,7 +3453,7 @@ xfs_bmap_btalloc(
args.tp = ap->tp;
args.mp = mp;
args.fsbno = ap->blkno;
- xfs_rmap_skip_owner_update(&args.oinfo);
+ args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
/* Trim the allocation back to the maximum an AG can fit. */
args.maxlen = min(ap->length, mp->m_ag_max_usable);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 488dc8860fd7..09d3ea97cc15 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -186,7 +186,7 @@ int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
int xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version);
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno,
- xfs_filblks_t len, struct xfs_owner_info *oinfo,
+ xfs_filblks_t len, const struct xfs_owner_info *oinfo,
bool skip_discard);
void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
@@ -234,7 +234,7 @@ xfs_bmap_add_free(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
- struct xfs_owner_info *oinfo)
+ const struct xfs_owner_info *oinfo)
{
__xfs_bmap_add_free(tp, bno, len, oinfo, false);
}
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index e792b167150a..94f00427de98 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -172,7 +172,13 @@
* reoccur.
*/
-static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX];
+static const struct xfs_defer_op_type *defer_op_types[] = {
+ [XFS_DEFER_OPS_TYPE_BMAP] = &xfs_bmap_update_defer_type,
+ [XFS_DEFER_OPS_TYPE_REFCOUNT] = &xfs_refcount_update_defer_type,
+ [XFS_DEFER_OPS_TYPE_RMAP] = &xfs_rmap_update_defer_type,
+ [XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type,
+ [XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type,
+};
/*
* For each pending item in the intake list, log its intent item and the
@@ -185,15 +191,15 @@ xfs_defer_create_intents(
{
struct list_head *li;
struct xfs_defer_pending *dfp;
+ const struct xfs_defer_op_type *ops;
list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
- dfp->dfp_intent = dfp->dfp_type->create_intent(tp,
- dfp->dfp_count);
+ ops = defer_op_types[dfp->dfp_type];
+ dfp->dfp_intent = ops->create_intent(tp, dfp->dfp_count);
trace_xfs_defer_create_intent(tp->t_mountp, dfp);
- list_sort(tp->t_mountp, &dfp->dfp_work,
- dfp->dfp_type->diff_items);
+ list_sort(tp->t_mountp, &dfp->dfp_work, ops->diff_items);
list_for_each(li, &dfp->dfp_work)
- dfp->dfp_type->log_item(tp, dfp->dfp_intent, li);
+ ops->log_item(tp, dfp->dfp_intent, li);
}
}
@@ -204,14 +210,16 @@ xfs_defer_trans_abort(
struct list_head *dop_pending)
{
struct xfs_defer_pending *dfp;
+ const struct xfs_defer_op_type *ops;
trace_xfs_defer_trans_abort(tp, _RET_IP_);
/* Abort intent items that don't have a done item. */
list_for_each_entry(dfp, dop_pending, dfp_list) {
+ ops = defer_op_types[dfp->dfp_type];
trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
if (dfp->dfp_intent && !dfp->dfp_done) {
- dfp->dfp_type->abort_intent(dfp->dfp_intent);
+ ops->abort_intent(dfp->dfp_intent);
dfp->dfp_intent = NULL;
}
}
@@ -315,18 +323,20 @@ xfs_defer_cancel_list(
struct xfs_defer_pending *pli;
struct list_head *pwi;
struct list_head *n;
+ const struct xfs_defer_op_type *ops;
/*
* Free the pending items. Caller should already have arranged
* for the intent items to be released.
*/
list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
+ ops = defer_op_types[dfp->dfp_type];
trace_xfs_defer_cancel_list(mp, dfp);
list_del(&dfp->dfp_list);
list_for_each_safe(pwi, n, &dfp->dfp_work) {
list_del(pwi);
dfp->dfp_count--;
- dfp->dfp_type->cancel_item(pwi);
+ ops->cancel_item(pwi);
}
ASSERT(dfp->dfp_count == 0);
kmem_free(dfp);
@@ -350,7 +360,7 @@ xfs_defer_finish_noroll(
struct list_head *n;
void *state;
int error = 0;
- void (*cleanup_fn)(struct xfs_trans *, void *, int);
+ const struct xfs_defer_op_type *ops;
LIST_HEAD(dop_pending);
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -373,18 +383,18 @@ xfs_defer_finish_noroll(
/* Log an intent-done item for the first pending item. */
dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
dfp_list);
+ ops = defer_op_types[dfp->dfp_type];
trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
- dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
+ dfp->dfp_done = ops->create_done(*tp, dfp->dfp_intent,
dfp->dfp_count);
- cleanup_fn = dfp->dfp_type->finish_cleanup;
/* Finish the work items. */
state = NULL;
list_for_each_safe(li, n, &dfp->dfp_work) {
list_del(li);
dfp->dfp_count--;
- error = dfp->dfp_type->finish_item(*tp, li,
- dfp->dfp_done, &state);
+ error = ops->finish_item(*tp, li, dfp->dfp_done,
+ &state);
if (error == -EAGAIN) {
/*
* Caller wants a fresh transaction;
@@ -400,8 +410,8 @@ xfs_defer_finish_noroll(
* xfs_defer_cancel will take care of freeing
* all these lists and stuff.
*/
- if (cleanup_fn)
- cleanup_fn(*tp, state, error);
+ if (ops->finish_cleanup)
+ ops->finish_cleanup(*tp, state, error);
goto out;
}
}
@@ -413,20 +423,19 @@ xfs_defer_finish_noroll(
* a Fresh Transaction while Finishing
* Deferred Work" above.
*/
- dfp->dfp_intent = dfp->dfp_type->create_intent(*tp,
+ dfp->dfp_intent = ops->create_intent(*tp,
dfp->dfp_count);
dfp->dfp_done = NULL;
list_for_each(li, &dfp->dfp_work)
- dfp->dfp_type->log_item(*tp, dfp->dfp_intent,
- li);
+ ops->log_item(*tp, dfp->dfp_intent, li);
} else {
/* Done with the dfp, free it. */
list_del(&dfp->dfp_list);
kmem_free(dfp);
}
- if (cleanup_fn)
- cleanup_fn(*tp, state, error);
+ if (ops->finish_cleanup)
+ ops->finish_cleanup(*tp, state, error);
}
out:
@@ -486,8 +495,10 @@ xfs_defer_add(
struct list_head *li)
{
struct xfs_defer_pending *dfp = NULL;
+ const struct xfs_defer_op_type *ops;
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+ BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
/*
* Add the item to a pending item at the end of the intake list.
@@ -497,15 +508,15 @@ xfs_defer_add(
if (!list_empty(&tp->t_dfops)) {
dfp = list_last_entry(&tp->t_dfops,
struct xfs_defer_pending, dfp_list);
- if (dfp->dfp_type->type != type ||
- (dfp->dfp_type->max_items &&
- dfp->dfp_count >= dfp->dfp_type->max_items))
+ ops = defer_op_types[dfp->dfp_type];
+ if (dfp->dfp_type != type ||
+ (ops->max_items && dfp->dfp_count >= ops->max_items))
dfp = NULL;
}
if (!dfp) {
dfp = kmem_alloc(sizeof(struct xfs_defer_pending),
KM_SLEEP | KM_NOFS);
- dfp->dfp_type = defer_op_types[type];
+ dfp->dfp_type = type;
dfp->dfp_intent = NULL;
dfp->dfp_done = NULL;
dfp->dfp_count = 0;
@@ -517,14 +528,6 @@ xfs_defer_add(
dfp->dfp_count++;
}
-/* Initialize a deferred operation list. */
-void
-xfs_defer_init_op_type(
- const struct xfs_defer_op_type *type)
-{
- defer_op_types[type->type] = type;
-}
-
/*
* Move deferred ops from one transaction to another and reset the source to
* initial state. This is primarily used to carry state forward across
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 2584a5b95b0d..7c28d7608ac6 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -9,20 +9,6 @@
struct xfs_defer_op_type;
/*
- * Save a log intent item and a list of extents, so that we can replay
- * whatever action had to happen to the extent list and file the log done
- * item.
- */
-struct xfs_defer_pending {
- const struct xfs_defer_op_type *dfp_type; /* function pointers */
- struct list_head dfp_list; /* pending items */
- void *dfp_intent; /* log intent item */
- void *dfp_done; /* log done item */
- struct list_head dfp_work; /* work items */
- unsigned int dfp_count; /* # extent items */
-};
-
-/*
* Header for deferred operation list.
*/
enum xfs_defer_ops_type {
@@ -34,6 +20,20 @@ enum xfs_defer_ops_type {
XFS_DEFER_OPS_TYPE_MAX,
};
+/*
+ * Save a log intent item and a list of extents, so that we can replay
+ * whatever action had to happen to the extent list and file the log done
+ * item.
+ */
+struct xfs_defer_pending {
+ struct list_head dfp_list; /* pending items */
+ struct list_head dfp_work; /* work items */
+ void *dfp_intent; /* log intent item */
+ void *dfp_done; /* log done item */
+ unsigned int dfp_count; /* # extent items */
+ enum xfs_defer_ops_type dfp_type;
+};
+
void xfs_defer_add(struct xfs_trans *tp, enum xfs_defer_ops_type type,
struct list_head *h);
int xfs_defer_finish_noroll(struct xfs_trans **tp);
@@ -43,8 +43,6 @@ void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp);
/* Description of a deferred type. */
struct xfs_defer_op_type {
- enum xfs_defer_ops_type type;
- unsigned int max_items;
void (*abort_intent)(void *);
void *(*create_done)(struct xfs_trans *, void *, unsigned int);
int (*finish_item)(struct xfs_trans *, struct list_head *, void *,
@@ -54,8 +52,13 @@ struct xfs_defer_op_type {
int (*diff_items)(void *, struct list_head *, struct list_head *);
void *(*create_intent)(struct xfs_trans *, uint);
void (*log_item)(struct xfs_trans *, void *, struct list_head *);
+ unsigned int max_items;
};
-void xfs_defer_init_op_type(const struct xfs_defer_op_type *type);
+extern const struct xfs_defer_op_type xfs_bmap_update_defer_type;
+extern const struct xfs_defer_op_type xfs_refcount_update_defer_type;
+extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
+extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
+extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
#endif /* __XFS_DEFER_H__ */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 9995d5ae380b..9bb3c48843ec 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -916,6 +916,9 @@ static inline uint xfs_dinode_size(int version)
/*
* Values for di_format
+ *
+ * This enum is used in string mapping in xfs_trace.h; please keep the
+ * TRACE_DEFINE_ENUMs for it up to date.
*/
typedef enum xfs_dinode_fmt {
XFS_DINODE_FMT_DEV, /* xfs_dev_t */
@@ -925,6 +928,13 @@ typedef enum xfs_dinode_fmt {
XFS_DINODE_FMT_UUID /* added long ago, but never used */
} xfs_dinode_fmt_t;
+#define XFS_INODE_FORMAT_STR \
+ { XFS_DINODE_FMT_DEV, "dev" }, \
+ { XFS_DINODE_FMT_LOCAL, "local" }, \
+ { XFS_DINODE_FMT_EXTENTS, "extent" }, \
+ { XFS_DINODE_FMT_BTREE, "btree" }, \
+ { XFS_DINODE_FMT_UUID, "uuid" }
+
/*
* Inode minimum and maximum sizes.
*/
@@ -1083,6 +1093,8 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
((i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp)))
#define XFS_OFFBNO_TO_AGINO(mp,b,o) \
((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o)))
+#define XFS_FSB_TO_INO(mp, b) ((xfs_ino_t)((b) << XFS_INO_OFFSET_BITS(mp)))
+#define XFS_AGB_TO_AGINO(mp, b) ((xfs_agino_t)((b) << XFS_INO_OFFSET_BITS(mp)))
#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL))
#define XFS_MAXINUMBER_32 ((xfs_ino_t)((1ULL << 32) - 1ULL))
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index a8f6db735d5d..d32152fc8a6c 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -288,7 +288,7 @@ xfs_ialloc_inode_init(
{
struct xfs_buf *fbuf;
struct xfs_dinode *free;
- int nbufs, blks_per_cluster, inodes_per_cluster;
+ int nbufs;
int version;
int i, j;
xfs_daddr_t d;
@@ -299,9 +299,7 @@ xfs_ialloc_inode_init(
* sizes, manipulate the inodes in buffers which are multiples of the
* blocks size.
*/
- blks_per_cluster = xfs_icluster_size_fsb(mp);
- inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
- nbufs = length / blks_per_cluster;
+ nbufs = length / mp->m_blocks_per_cluster;
/*
* Figure out what version number to use in the inodes we create. If
@@ -312,7 +310,7 @@ xfs_ialloc_inode_init(
*
* For v3 inodes, we also need to write the inode number into the inode,
* so calculate the first inode number of the chunk here as
- * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not
+ * XFS_AGB_TO_AGINO() only works within a filesystem block, not
* across multiple filesystem blocks (such as a cluster) and so cannot
* be used in the cluster buffer loop below.
*
@@ -324,8 +322,7 @@ xfs_ialloc_inode_init(
*/
if (xfs_sb_version_hascrc(&mp->m_sb)) {
version = 3;
- ino = XFS_AGINO_TO_INO(mp, agno,
- XFS_OFFBNO_TO_AGINO(mp, agbno, 0));
+ ino = XFS_AGINO_TO_INO(mp, agno, XFS_AGB_TO_AGINO(mp, agbno));
/*
* log the initialisation that is about to take place as an
@@ -345,9 +342,10 @@ xfs_ialloc_inode_init(
/*
* Get the block.
*/
- d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
+ d = XFS_AGB_TO_DADDR(mp, agno, agbno +
+ (j * mp->m_blocks_per_cluster));
fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
- mp->m_bsize * blks_per_cluster,
+ mp->m_bsize * mp->m_blocks_per_cluster,
XBF_UNMAPPED);
if (!fbuf)
return -ENOMEM;
@@ -355,7 +353,7 @@ xfs_ialloc_inode_init(
/* Initialize the inode buffers and log them appropriately. */
fbuf->b_ops = &xfs_inode_buf_ops;
xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
- for (i = 0; i < inodes_per_cluster; i++) {
+ for (i = 0; i < mp->m_inodes_per_cluster; i++) {
int ioffset = i << mp->m_sb.sb_inodelog;
uint isize = xfs_dinode_size(version);
@@ -445,7 +443,7 @@ xfs_align_sparse_ino(
return;
/* calculate the inode offset and align startino */
- offset = mod << mp->m_sb.sb_inopblog;
+ offset = XFS_AGB_TO_AGINO(mp, mod);
*startino -= offset;
/*
@@ -641,7 +639,7 @@ xfs_ialloc_ag_alloc(
args.tp = tp;
args.mp = tp->t_mountp;
args.fsbno = NULLFSBLOCK;
- xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INODES);
+ args.oinfo = XFS_RMAP_OINFO_INODES;
#ifdef DEBUG
/* randomly do sparse inode allocations */
@@ -692,7 +690,7 @@ xfs_ialloc_ag_alloc(
* but not to use them in the actual exact allocation.
*/
args.alignment = 1;
- args.minalignslop = xfs_ialloc_cluster_alignment(args.mp) - 1;
+ args.minalignslop = args.mp->m_cluster_align - 1;
/* Allow space for the inode btree to split. */
args.minleft = args.mp->m_in_maxlevels - 1;
@@ -727,7 +725,7 @@ xfs_ialloc_ag_alloc(
args.alignment = args.mp->m_dalign;
isaligned = 1;
} else
- args.alignment = xfs_ialloc_cluster_alignment(args.mp);
+ args.alignment = args.mp->m_cluster_align;
/*
* Need to figure out where to allocate the inode blocks.
* Ideally they should be spaced out through the a.g.
@@ -756,7 +754,7 @@ xfs_ialloc_ag_alloc(
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.agbno = be32_to_cpu(agi->agi_root);
args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
- args.alignment = xfs_ialloc_cluster_alignment(args.mp);
+ args.alignment = args.mp->m_cluster_align;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
@@ -797,7 +795,7 @@ sparse_alloc:
if (error)
return error;
- newlen = args.len << args.mp->m_sb.sb_inopblog;
+ newlen = XFS_AGB_TO_AGINO(args.mp, args.len);
ASSERT(newlen <= XFS_INODES_PER_CHUNK);
allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;
}
@@ -825,7 +823,7 @@ sparse_alloc:
/*
* Convert the results.
*/
- newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
+ newino = XFS_AGB_TO_AGINO(args.mp, args.agbno);
if (xfs_inobt_issparse(~allocmask)) {
/*
@@ -1019,7 +1017,7 @@ xfs_ialloc_ag_select(
*/
ineed = mp->m_ialloc_min_blks;
if (flags && ineed > 1)
- ineed += xfs_ialloc_cluster_alignment(mp);
+ ineed += mp->m_cluster_align;
longest = pag->pagf_longest;
if (!longest)
longest = pag->pagf_flcount > 0;
@@ -1849,14 +1847,12 @@ xfs_difree_inode_chunk(
int nextbit;
xfs_agblock_t agbno;
int contigblk;
- struct xfs_owner_info oinfo;
DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
if (!xfs_inobt_issparse(rec->ir_holemask)) {
/* not sparse, calculate extent info directly */
xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno),
- mp->m_ialloc_blks, &oinfo);
+ mp->m_ialloc_blks, &XFS_RMAP_OINFO_INODES);
return;
}
@@ -1900,7 +1896,7 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, agbno),
- contigblk, &oinfo);
+ contigblk, &XFS_RMAP_OINFO_INODES);
/* reset range to current bit and carry on... */
startidx = endidx = nextbit;
@@ -2292,7 +2288,6 @@ xfs_imap(
xfs_agblock_t agbno; /* block number of inode in the alloc group */
xfs_agino_t agino; /* inode number within alloc group */
xfs_agnumber_t agno; /* allocation group number */
- int blks_per_cluster; /* num blocks per inode cluster */
xfs_agblock_t chunk_agbno; /* first block in inode chunk */
xfs_agblock_t cluster_agbno; /* first block in inode cluster */
int error; /* error code */
@@ -2338,8 +2333,6 @@ xfs_imap(
return -EINVAL;
}
- blks_per_cluster = xfs_icluster_size_fsb(mp);
-
/*
* For bulkstat and handle lookups, we have an untrusted inode number
* that we have to verify is valid. We cannot do this just by reading
@@ -2359,7 +2352,7 @@ xfs_imap(
* If the inode cluster size is the same as the blocksize or
* smaller we get to the buffer by simple arithmetics.
*/
- if (blks_per_cluster == 1) {
+ if (mp->m_blocks_per_cluster == 1) {
offset = XFS_INO_TO_OFFSET(mp, ino);
ASSERT(offset < mp->m_sb.sb_inopblock);
@@ -2388,12 +2381,13 @@ xfs_imap(
out_map:
ASSERT(agbno >= chunk_agbno);
cluster_agbno = chunk_agbno +
- ((offset_agbno / blks_per_cluster) * blks_per_cluster);
+ ((offset_agbno / mp->m_blocks_per_cluster) *
+ mp->m_blocks_per_cluster);
offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
XFS_INO_TO_OFFSET(mp, ino);
imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
- imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
+ imap->im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
/*
@@ -2726,8 +2720,8 @@ xfs_ialloc_has_inodes_at_extent(
xfs_agino_t low;
xfs_agino_t high;
- low = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno, 0);
- high = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno + len, 0) - 1;
+ low = XFS_AGB_TO_AGINO(cur->bc_mp, bno);
+ high = XFS_AGB_TO_AGINO(cur->bc_mp, bno + len) - 1;
return xfs_ialloc_has_inode_record(cur, low, high, exists);
}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 7fbf8af0b159..9b25e7a0df47 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -84,7 +84,7 @@ __xfs_inobt_alloc_block(
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
- xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INOBT);
+ args.oinfo = XFS_RMAP_OINFO_INOBT;
args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
args.minlen = 1;
args.maxlen = 1;
@@ -136,12 +136,9 @@ __xfs_inobt_free_block(
struct xfs_buf *bp,
enum xfs_ag_resv_type resv)
{
- struct xfs_owner_info oinfo;
-
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
return xfs_free_extent(cur->bc_tp,
XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
- &oinfo, resv);
+ &XFS_RMAP_OINFO_INOBT, resv);
}
STATIC int
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 1aaa01c97517..d9eab657b63e 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -70,7 +70,7 @@ xfs_refcountbt_alloc_block(
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
xfs_refc_block(args.mp));
- xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_REFC);
+ args.oinfo = XFS_RMAP_OINFO_REFC;
args.minlen = args.maxlen = args.prod = 1;
args.resv = XFS_AG_RESV_METADATA;
@@ -106,15 +106,13 @@ xfs_refcountbt_free_block(
struct xfs_buf *agbp = cur->bc_private.a.agbp;
struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
- struct xfs_owner_info oinfo;
int error;
trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
be32_add_cpu(&agf->agf_refcount_blocks, -1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
- error = xfs_free_extent(cur->bc_tp, fsbno, 1, &oinfo,
+ error = xfs_free_extent(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_REFC,
XFS_AG_RESV_METADATA);
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 245af452840e..8ed885507dd8 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -458,21 +458,21 @@ out:
*/
STATIC int
xfs_rmap_unmap(
- struct xfs_btree_cur *cur,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- bool unwritten,
- struct xfs_owner_info *oinfo)
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool unwritten,
+ const struct xfs_owner_info *oinfo)
{
- struct xfs_mount *mp = cur->bc_mp;
- struct xfs_rmap_irec ltrec;
- uint64_t ltoff;
- int error = 0;
- int i;
- uint64_t owner;
- uint64_t offset;
- unsigned int flags;
- bool ignore_off;
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_rmap_irec ltrec;
+ uint64_t ltoff;
+ int error = 0;
+ int i;
+ uint64_t owner;
+ uint64_t offset;
+ unsigned int flags;
+ bool ignore_off;
xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
@@ -653,16 +653,16 @@ out_error:
*/
int
xfs_rmap_free(
- struct xfs_trans *tp,
- struct xfs_buf *agbp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- struct xfs_owner_info *oinfo)
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo)
{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_btree_cur *cur;
- int error;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_btree_cur *cur;
+ int error;
if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
return 0;
@@ -710,23 +710,23 @@ xfs_rmap_is_mergeable(
*/
STATIC int
xfs_rmap_map(
- struct xfs_btree_cur *cur,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- bool unwritten,
- struct xfs_owner_info *oinfo)
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool unwritten,
+ const struct xfs_owner_info *oinfo)
{
- struct xfs_mount *mp = cur->bc_mp;
- struct xfs_rmap_irec ltrec;
- struct xfs_rmap_irec gtrec;
- int have_gt;
- int have_lt;
- int error = 0;
- int i;
- uint64_t owner;
- uint64_t offset;
- unsigned int flags = 0;
- bool ignore_off;
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_rmap_irec ltrec;
+ struct xfs_rmap_irec gtrec;
+ int have_gt;
+ int have_lt;
+ int error = 0;
+ int i;
+ uint64_t owner;
+ uint64_t offset;
+ unsigned int flags = 0;
+ bool ignore_off;
xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
ASSERT(owner != 0);
@@ -890,16 +890,16 @@ out_error:
*/
int
xfs_rmap_alloc(
- struct xfs_trans *tp,
- struct xfs_buf *agbp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- struct xfs_owner_info *oinfo)
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo)
{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_btree_cur *cur;
- int error;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_btree_cur *cur;
+ int error;
if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
return 0;
@@ -929,16 +929,16 @@ xfs_rmap_alloc(
*/
STATIC int
xfs_rmap_convert(
- struct xfs_btree_cur *cur,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- bool unwritten,
- struct xfs_owner_info *oinfo)
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool unwritten,
+ const struct xfs_owner_info *oinfo)
{
- struct xfs_mount *mp = cur->bc_mp;
- struct xfs_rmap_irec r[4]; /* neighbor extent entries */
- /* left is 0, right is 1, prev is 2 */
- /* new is 3 */
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_rmap_irec r[4]; /* neighbor extent entries */
+ /* left is 0, right is 1, */
+ /* prev is 2, new is 3 */
uint64_t owner;
uint64_t offset;
uint64_t new_endoff;
@@ -1354,16 +1354,16 @@ done:
*/
STATIC int
xfs_rmap_convert_shared(
- struct xfs_btree_cur *cur,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- bool unwritten,
- struct xfs_owner_info *oinfo)
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool unwritten,
+ const struct xfs_owner_info *oinfo)
{
- struct xfs_mount *mp = cur->bc_mp;
- struct xfs_rmap_irec r[4]; /* neighbor extent entries */
- /* left is 0, right is 1, prev is 2 */
- /* new is 3 */
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_rmap_irec r[4]; /* neighbor extent entries */
+ /* left is 0, right is 1, */
+ /* prev is 2, new is 3 */
uint64_t owner;
uint64_t offset;
uint64_t new_endoff;
@@ -1743,20 +1743,20 @@ done:
*/
STATIC int
xfs_rmap_unmap_shared(
- struct xfs_btree_cur *cur,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- bool unwritten,
- struct xfs_owner_info *oinfo)
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool unwritten,
+ const struct xfs_owner_info *oinfo)
{
- struct xfs_mount *mp = cur->bc_mp;
- struct xfs_rmap_irec ltrec;
- uint64_t ltoff;
- int error = 0;
- int i;
- uint64_t owner;
- uint64_t offset;
- unsigned int flags;
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_rmap_irec ltrec;
+ uint64_t ltoff;
+ int error = 0;
+ int i;
+ uint64_t owner;
+ uint64_t offset;
+ unsigned int flags;
xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
if (unwritten)
@@ -1905,22 +1905,22 @@ out_error:
*/
STATIC int
xfs_rmap_map_shared(
- struct xfs_btree_cur *cur,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- bool unwritten,
- struct xfs_owner_info *oinfo)
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool unwritten,
+ const struct xfs_owner_info *oinfo)
{
- struct xfs_mount *mp = cur->bc_mp;
- struct xfs_rmap_irec ltrec;
- struct xfs_rmap_irec gtrec;
- int have_gt;
- int have_lt;
- int error = 0;
- int i;
- uint64_t owner;
- uint64_t offset;
- unsigned int flags = 0;
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_rmap_irec ltrec;
+ struct xfs_rmap_irec gtrec;
+ int have_gt;
+ int have_lt;
+ int error = 0;
+ int i;
+ uint64_t owner;
+ uint64_t offset;
+ unsigned int flags = 0;
xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
if (unwritten)
@@ -2459,18 +2459,18 @@ xfs_rmap_has_record(
*/
int
xfs_rmap_record_exists(
- struct xfs_btree_cur *cur,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- struct xfs_owner_info *oinfo,
- bool *has_rmap)
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo,
+ bool *has_rmap)
{
- uint64_t owner;
- uint64_t offset;
- unsigned int flags;
- int has_record;
- struct xfs_rmap_irec irec;
- int error;
+ uint64_t owner;
+ uint64_t offset;
+ unsigned int flags;
+ int has_record;
+ struct xfs_rmap_irec irec;
+ int error;
xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) ||
@@ -2530,7 +2530,7 @@ xfs_rmap_has_other_keys(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
xfs_extlen_t len,
- struct xfs_owner_info *oinfo,
+ const struct xfs_owner_info *oinfo,
bool *has_rmap)
{
struct xfs_rmap_irec low = {0};
@@ -2550,3 +2550,31 @@ xfs_rmap_has_other_keys(
*has_rmap = rks.has_rmap;
return error;
}
+
+const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = {
+ .oi_owner = XFS_RMAP_OWN_NULL,
+};
+const struct xfs_owner_info XFS_RMAP_OINFO_ANY_OWNER = {
+ .oi_owner = XFS_RMAP_OWN_UNKNOWN,
+};
+const struct xfs_owner_info XFS_RMAP_OINFO_FS = {
+ .oi_owner = XFS_RMAP_OWN_FS,
+};
+const struct xfs_owner_info XFS_RMAP_OINFO_LOG = {
+ .oi_owner = XFS_RMAP_OWN_LOG,
+};
+const struct xfs_owner_info XFS_RMAP_OINFO_AG = {
+ .oi_owner = XFS_RMAP_OWN_AG,
+};
+const struct xfs_owner_info XFS_RMAP_OINFO_INOBT = {
+ .oi_owner = XFS_RMAP_OWN_INOBT,
+};
+const struct xfs_owner_info XFS_RMAP_OINFO_INODES = {
+ .oi_owner = XFS_RMAP_OWN_INODES,
+};
+const struct xfs_owner_info XFS_RMAP_OINFO_REFC = {
+ .oi_owner = XFS_RMAP_OWN_REFC,
+};
+const struct xfs_owner_info XFS_RMAP_OINFO_COW = {
+ .oi_owner = XFS_RMAP_OWN_COW,
+};
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 157dc722ad35..e21ed0294e5c 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -7,16 +7,6 @@
#define __XFS_RMAP_H__
static inline void
-xfs_rmap_ag_owner(
- struct xfs_owner_info *oi,
- uint64_t owner)
-{
- oi->oi_owner = owner;
- oi->oi_offset = 0;
- oi->oi_flags = 0;
-}
-
-static inline void
xfs_rmap_ino_bmbt_owner(
struct xfs_owner_info *oi,
xfs_ino_t ino,
@@ -43,27 +33,13 @@ xfs_rmap_ino_owner(
oi->oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
}
-static inline void
-xfs_rmap_skip_owner_update(
- struct xfs_owner_info *oi)
-{
- xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL);
-}
-
static inline bool
xfs_rmap_should_skip_owner_update(
- struct xfs_owner_info *oi)
+ const struct xfs_owner_info *oi)
{
return oi->oi_owner == XFS_RMAP_OWN_NULL;
}
-static inline void
-xfs_rmap_any_owner_update(
- struct xfs_owner_info *oi)
-{
- xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);
-}
-
/* Reverse mapping functions. */
struct xfs_buf;
@@ -103,12 +79,12 @@ xfs_rmap_irec_offset_unpack(
static inline void
xfs_owner_info_unpack(
- struct xfs_owner_info *oinfo,
- uint64_t *owner,
- uint64_t *offset,
- unsigned int *flags)
+ const struct xfs_owner_info *oinfo,
+ uint64_t *owner,
+ uint64_t *offset,
+ unsigned int *flags)
{
- unsigned int r = 0;
+ unsigned int r = 0;
*owner = oinfo->oi_owner;
*offset = oinfo->oi_offset;
@@ -137,10 +113,10 @@ xfs_owner_info_pack(
int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
- struct xfs_owner_info *oinfo);
+ const struct xfs_owner_info *oinfo);
int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
- struct xfs_owner_info *oinfo);
+ const struct xfs_owner_info *oinfo);
int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
xfs_extlen_t len, uint64_t owner, uint64_t offset,
@@ -218,11 +194,21 @@ int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec,
int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
xfs_extlen_t len, bool *exists);
int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, struct xfs_owner_info *oinfo,
+ xfs_extlen_t len, const struct xfs_owner_info *oinfo,
bool *has_rmap);
int xfs_rmap_has_other_keys(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, struct xfs_owner_info *oinfo,
+ xfs_extlen_t len, const struct xfs_owner_info *oinfo,
bool *has_rmap);
int xfs_rmap_map_raw(struct xfs_btree_cur *cur, struct xfs_rmap_irec *rmap);
+extern const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE;
+extern const struct xfs_owner_info XFS_RMAP_OINFO_ANY_OWNER;
+extern const struct xfs_owner_info XFS_RMAP_OINFO_FS;
+extern const struct xfs_owner_info XFS_RMAP_OINFO_LOG;
+extern const struct xfs_owner_info XFS_RMAP_OINFO_AG;
+extern const struct xfs_owner_info XFS_RMAP_OINFO_INOBT;
+extern const struct xfs_owner_info XFS_RMAP_OINFO_INODES;
+extern const struct xfs_owner_info XFS_RMAP_OINFO_REFC;
+extern const struct xfs_owner_info XFS_RMAP_OINFO_COW;
+
#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index b228c821bae6..eaaff67e9626 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -505,6 +505,12 @@ xfs_rtmodify_summary_int(
uint first = (uint)((char *)sp - (char *)bp->b_addr);
*sp += delta;
+ if (mp->m_rsum_cache) {
+ if (*sp == 0 && log == mp->m_rsum_cache[bbno])
+ mp->m_rsum_cache[bbno]++;
+ if (*sp != 0 && log < mp->m_rsum_cache[bbno])
+ mp->m_rsum_cache[bbno] = log;
+ }
xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1);
}
if (sum)
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 95374ab2dee7..77d80106f989 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -199,7 +199,10 @@ xfs_symlink_local_to_remote(
ifp->if_bytes - 1);
}
-/* Verify the consistency of an inline symlink. */
+/*
+ * Verify the in-memory consistency of an inline symlink data fork. This
+ * does not do on-disk format checks.
+ */
xfs_failaddr_t
xfs_symlink_shortform_verify(
struct xfs_inode *ip)
@@ -215,9 +218,12 @@ xfs_symlink_shortform_verify(
size = ifp->if_bytes;
endp = sfp + size;
- /* Zero length symlinks can exist while we're deleting a remote one. */
- if (size == 0)
- return NULL;
+ /*
+ * Zero length symlinks should never occur in memory as they are
+ * never alllowed to exist on disk.
+ */
+ if (!size)
+ return __this_address;
/* No negative sizes or overly long symlink targets. */
if (size < 0 || size > XFS_SYMLINK_MAXLEN)
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index 33a5ca346baf..3306fc42cfad 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -87,16 +87,15 @@ xfs_agino_range(
* Calculate the first inode, which will be in the first
* cluster-aligned block after the AGFL.
*/
- bno = round_up(XFS_AGFL_BLOCK(mp) + 1,
- xfs_ialloc_cluster_alignment(mp));
- *first = XFS_OFFBNO_TO_AGINO(mp, bno, 0);
+ bno = round_up(XFS_AGFL_BLOCK(mp) + 1, mp->m_cluster_align);
+ *first = XFS_AGB_TO_AGINO(mp, bno);
/*
* Calculate the last inode, which will be at the end of the
* last (aligned) cluster that can be allocated in the AG.
*/
- bno = round_down(eoag, xfs_ialloc_cluster_alignment(mp));
- *last = XFS_OFFBNO_TO_AGINO(mp, bno, 0) - 1;
+ bno = round_down(eoag, mp->m_cluster_align);
+ *last = XFS_AGB_TO_AGINO(mp, bno) - 1;
}
/*
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index b9e6c89284c3..8f02855a019a 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -100,15 +100,37 @@ typedef void * xfs_failaddr_t;
*/
#define MAXNAMELEN 256
+/*
+ * This enum is used in string mapping in xfs_trace.h; please keep the
+ * TRACE_DEFINE_ENUMs for it up to date.
+ */
typedef enum {
XFS_LOOKUP_EQi, XFS_LOOKUP_LEi, XFS_LOOKUP_GEi
} xfs_lookup_t;
+#define XFS_AG_BTREE_CMP_FORMAT_STR \
+ { XFS_LOOKUP_EQi, "eq" }, \
+ { XFS_LOOKUP_LEi, "le" }, \
+ { XFS_LOOKUP_GEi, "ge" }
+
+/*
+ * This enum is used in string mapping in xfs_trace.h and scrub/trace.h;
+ * please keep the TRACE_DEFINE_ENUMs for it up to date.
+ */
typedef enum {
XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi,
XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_REFCi, XFS_BTNUM_MAX
} xfs_btnum_t;
+#define XFS_BTNUM_STRINGS \
+ { XFS_BTNUM_BNOi, "bnobt" }, \
+ { XFS_BTNUM_CNTi, "cntbt" }, \
+ { XFS_BTNUM_RMAPi, "rmapbt" }, \
+ { XFS_BTNUM_BMAPi, "bmbt" }, \
+ { XFS_BTNUM_INOi, "inobt" }, \
+ { XFS_BTNUM_FINOi, "finobt" }, \
+ { XFS_BTNUM_REFCi, "refcbt" }
+
struct xfs_name {
const unsigned char *name;
int len;
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 3068a9382feb..90955ab1e895 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -32,7 +32,6 @@ xchk_superblock_xref(
struct xfs_scrub *sc,
struct xfs_buf *bp)
{
- struct xfs_owner_info oinfo;
struct xfs_mount *mp = sc->mp;
xfs_agnumber_t agno = sc->sm->sm_agno;
xfs_agblock_t agbno;
@@ -49,8 +48,7 @@ xchk_superblock_xref(
xchk_xref_is_used_space(sc, agbno, 1);
xchk_xref_is_not_inode_chunk(sc, agbno, 1);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
- xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_FS);
xchk_xref_is_not_shared(sc, agbno, 1);
/* scrub teardown will take care of sc->sa for us */
@@ -484,7 +482,6 @@ STATIC void
xchk_agf_xref(
struct xfs_scrub *sc)
{
- struct xfs_owner_info oinfo;
struct xfs_mount *mp = sc->mp;
xfs_agblock_t agbno;
int error;
@@ -502,8 +499,7 @@ xchk_agf_xref(
xchk_agf_xref_freeblks(sc);
xchk_agf_xref_cntbt(sc);
xchk_xref_is_not_inode_chunk(sc, agbno, 1);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
- xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_FS);
xchk_agf_xref_btreeblks(sc);
xchk_xref_is_not_shared(sc, agbno, 1);
xchk_agf_xref_refcblks(sc);
@@ -598,7 +594,6 @@ out:
/* AGFL */
struct xchk_agfl_info {
- struct xfs_owner_info oinfo;
unsigned int sz_entries;
unsigned int nr_entries;
xfs_agblock_t *entries;
@@ -609,15 +604,14 @@ struct xchk_agfl_info {
STATIC void
xchk_agfl_block_xref(
struct xfs_scrub *sc,
- xfs_agblock_t agbno,
- struct xfs_owner_info *oinfo)
+ xfs_agblock_t agbno)
{
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return;
xchk_xref_is_used_space(sc, agbno, 1);
xchk_xref_is_not_inode_chunk(sc, agbno, 1);
- xchk_xref_is_owned_by(sc, agbno, 1, oinfo);
+ xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_AG);
xchk_xref_is_not_shared(sc, agbno, 1);
}
@@ -638,7 +632,7 @@ xchk_agfl_block(
else
xchk_block_set_corrupt(sc, sc->sa.agfl_bp);
- xchk_agfl_block_xref(sc, agbno, priv);
+ xchk_agfl_block_xref(sc, agbno);
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return XFS_BTREE_QUERY_RANGE_ABORT;
@@ -662,7 +656,6 @@ STATIC void
xchk_agfl_xref(
struct xfs_scrub *sc)
{
- struct xfs_owner_info oinfo;
struct xfs_mount *mp = sc->mp;
xfs_agblock_t agbno;
int error;
@@ -678,8 +671,7 @@ xchk_agfl_xref(
xchk_xref_is_used_space(sc, agbno, 1);
xchk_xref_is_not_inode_chunk(sc, agbno, 1);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
- xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_FS);
xchk_xref_is_not_shared(sc, agbno, 1);
/*
@@ -732,7 +724,6 @@ xchk_agfl(
}
/* Check the blocks in the AGFL. */
- xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG);
error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
sc->sa.agfl_bp, xchk_agfl_block, &sai);
if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
@@ -791,7 +782,6 @@ STATIC void
xchk_agi_xref(
struct xfs_scrub *sc)
{
- struct xfs_owner_info oinfo;
struct xfs_mount *mp = sc->mp;
xfs_agblock_t agbno;
int error;
@@ -808,8 +798,7 @@ xchk_agi_xref(
xchk_xref_is_used_space(sc, agbno, 1);
xchk_xref_is_not_inode_chunk(sc, agbno, 1);
xchk_agi_xref_icounts(sc);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
- xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_FS);
xchk_xref_is_not_shared(sc, agbno, 1);
/* scrub teardown will take care of sc->sa for us */
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index f7568a4b5fe5..03d1e15cceba 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -646,7 +646,6 @@ int
xrep_agfl(
struct xfs_scrub *sc)
{
- struct xfs_owner_info oinfo;
struct xfs_bitmap agfl_extents;
struct xfs_mount *mp = sc->mp;
struct xfs_buf *agf_bp;
@@ -708,8 +707,8 @@ xrep_agfl(
goto err;
/* Dump any AGFL overflow. */
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
- return xrep_reap_extents(sc, &agfl_extents, &oinfo, XFS_AG_RESV_AGFL);
+ return xrep_reap_extents(sc, &agfl_extents, &XFS_RMAP_OINFO_AG,
+ XFS_AG_RESV_AGFL);
err:
xfs_bitmap_destroy(&agfl_extents);
return error;
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 376bcb585ae6..44883e9112ad 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -125,12 +125,10 @@ xchk_allocbt(
struct xfs_scrub *sc,
xfs_btnum_t which)
{
- struct xfs_owner_info oinfo;
struct xfs_btree_cur *cur;
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur;
- return xchk_btree(sc, cur, xchk_allocbt_rec, &oinfo, NULL);
+ return xchk_btree(sc, cur, xchk_allocbt_rec, &XFS_RMAP_OINFO_AG, NULL);
}
int
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index 4ae959f7ad2c..6f94d1f7322d 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -583,31 +583,32 @@ xchk_btree_block_keys(
*/
int
xchk_btree(
- struct xfs_scrub *sc,
- struct xfs_btree_cur *cur,
- xchk_btree_rec_fn scrub_fn,
- struct xfs_owner_info *oinfo,
- void *private)
+ struct xfs_scrub *sc,
+ struct xfs_btree_cur *cur,
+ xchk_btree_rec_fn scrub_fn,
+ const struct xfs_owner_info *oinfo,
+ void *private)
{
- struct xchk_btree bs = { NULL };
- union xfs_btree_ptr ptr;
- union xfs_btree_ptr *pp;
- union xfs_btree_rec *recp;
- struct xfs_btree_block *block;
- int level;
- struct xfs_buf *bp;
- struct check_owner *co;
- struct check_owner *n;
- int i;
- int error = 0;
+ struct xchk_btree bs = {
+ .cur = cur,
+ .scrub_rec = scrub_fn,
+ .oinfo = oinfo,
+ .firstrec = true,
+ .private = private,
+ .sc = sc,
+ };
+ union xfs_btree_ptr ptr;
+ union xfs_btree_ptr *pp;
+ union xfs_btree_rec *recp;
+ struct xfs_btree_block *block;
+ int level;
+ struct xfs_buf *bp;
+ struct check_owner *co;
+ struct check_owner *n;
+ int i;
+ int error = 0;
/* Initialize scrub state */
- bs.cur = cur;
- bs.scrub_rec = scrub_fn;
- bs.oinfo = oinfo;
- bs.firstrec = true;
- bs.private = private;
- bs.sc = sc;
for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
bs.firstkey[i] = true;
INIT_LIST_HEAD(&bs.to_check);
diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h
index aada763cd006..5572e475f8ed 100644
--- a/fs/xfs/scrub/btree.h
+++ b/fs/xfs/scrub/btree.h
@@ -31,21 +31,21 @@ typedef int (*xchk_btree_rec_fn)(
struct xchk_btree {
/* caller-provided scrub state */
- struct xfs_scrub *sc;
- struct xfs_btree_cur *cur;
- xchk_btree_rec_fn scrub_rec;
- struct xfs_owner_info *oinfo;
- void *private;
+ struct xfs_scrub *sc;
+ struct xfs_btree_cur *cur;
+ xchk_btree_rec_fn scrub_rec;
+ const struct xfs_owner_info *oinfo;
+ void *private;
/* internal scrub state */
- union xfs_btree_rec lastrec;
- bool firstrec;
- union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS];
- bool firstkey[XFS_BTREE_MAXLEVELS];
- struct list_head to_check;
+ union xfs_btree_rec lastrec;
+ bool firstrec;
+ union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS];
+ bool firstkey[XFS_BTREE_MAXLEVELS];
+ struct list_head to_check;
};
int xchk_btree(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
- xchk_btree_rec_fn scrub_fn, struct xfs_owner_info *oinfo,
+ xchk_btree_rec_fn scrub_fn, const struct xfs_owner_info *oinfo,
void *private);
#endif /* __XFS_SCRUB_BTREE_H__ */
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 346b02abccf7..0c54ff55b901 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -313,8 +313,8 @@ xchk_set_incomplete(
*/
struct xchk_rmap_ownedby_info {
- struct xfs_owner_info *oinfo;
- xfs_filblks_t *blocks;
+ const struct xfs_owner_info *oinfo;
+ xfs_filblks_t *blocks;
};
STATIC int
@@ -347,15 +347,15 @@ int
xchk_count_rmap_ownedby_ag(
struct xfs_scrub *sc,
struct xfs_btree_cur *cur,
- struct xfs_owner_info *oinfo,
+ const struct xfs_owner_info *oinfo,
xfs_filblks_t *blocks)
{
- struct xchk_rmap_ownedby_info sroi;
+ struct xchk_rmap_ownedby_info sroi = {
+ .oinfo = oinfo,
+ .blocks = blocks,
+ };
- sroi.oinfo = oinfo;
*blocks = 0;
- sroi.blocks = blocks;
-
return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
&sroi);
}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 2d4324d12f9a..e26a430bd466 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -116,7 +116,7 @@ int xchk_ag_read_headers(struct xfs_scrub *sc, xfs_agnumber_t agno,
void xchk_ag_btcur_free(struct xchk_ag *sa);
int xchk_ag_btcur_init(struct xfs_scrub *sc, struct xchk_ag *sa);
int xchk_count_rmap_ownedby_ag(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
- struct xfs_owner_info *oinfo, xfs_filblks_t *blocks);
+ const struct xfs_owner_info *oinfo, xfs_filblks_t *blocks);
int xchk_setup_ag_btree(struct xfs_scrub *sc, struct xfs_inode *ip,
bool force_log);
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 224dba937492..882dc56c5c21 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -44,6 +44,11 @@ xchk_setup_ag_iallocbt(
/* Inode btree scrubber. */
+struct xchk_iallocbt {
+ /* Number of inodes we see while scanning inobt. */
+ unsigned long long inodes;
+};
+
/*
* If we're checking the finobt, cross-reference with the inobt.
* Otherwise we're checking the inobt; if there is an finobt, make sure
@@ -82,15 +87,12 @@ xchk_iallocbt_chunk_xref(
xfs_agblock_t agbno,
xfs_extlen_t len)
{
- struct xfs_owner_info oinfo;
-
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return;
xchk_xref_is_used_space(sc, agbno, len);
xchk_iallocbt_chunk_xref_other(sc, irec, agino);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
- xchk_xref_is_owned_by(sc, agbno, len, &oinfo);
+ xchk_xref_is_owned_by(sc, agbno, len, &XFS_RMAP_OINFO_INODES);
xchk_xref_is_not_shared(sc, agbno, len);
}
@@ -186,7 +188,6 @@ xchk_iallocbt_check_freemask(
struct xchk_btree *bs,
struct xfs_inobt_rec_incore *irec)
{
- struct xfs_owner_info oinfo;
struct xfs_imap imap;
struct xfs_mount *mp = bs->cur->bc_mp;
struct xfs_dinode *dip;
@@ -197,19 +198,16 @@ xchk_iallocbt_check_freemask(
xfs_agino_t chunkino;
xfs_agino_t clusterino;
xfs_agblock_t agbno;
- int blks_per_cluster;
uint16_t holemask;
uint16_t ir_holemask;
int error = 0;
/* Make sure the freemask matches the inode records. */
- blks_per_cluster = xfs_icluster_size_fsb(mp);
- nr_inodes = XFS_OFFBNO_TO_AGINO(mp, blks_per_cluster, 0);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+ nr_inodes = mp->m_inodes_per_cluster;
for (agino = irec->ir_startino;
agino < irec->ir_startino + XFS_INODES_PER_CHUNK;
- agino += blks_per_cluster * mp->m_sb.sb_inopblock) {
+ agino += mp->m_inodes_per_cluster) {
fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino);
chunkino = agino - irec->ir_startino;
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
@@ -230,17 +228,18 @@ xchk_iallocbt_check_freemask(
/* If any part of this is a hole, skip it. */
if (ir_holemask) {
xchk_xref_is_not_owned_by(bs->sc, agbno,
- blks_per_cluster, &oinfo);
+ mp->m_blocks_per_cluster,
+ &XFS_RMAP_OINFO_INODES);
continue;
}
- xchk_xref_is_owned_by(bs->sc, agbno, blks_per_cluster,
- &oinfo);
+ xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster,
+ &XFS_RMAP_OINFO_INODES);
/* Grab the inode cluster buffer. */
imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno,
agbno);
- imap.im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
+ imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
imap.im_boffset = 0;
error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap,
@@ -272,7 +271,7 @@ xchk_iallocbt_rec(
union xfs_btree_rec *rec)
{
struct xfs_mount *mp = bs->cur->bc_mp;
- xfs_filblks_t *inode_blocks = bs->private;
+ struct xchk_iallocbt *iabt = bs->private;
struct xfs_inobt_rec_incore irec;
uint64_t holes;
xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
@@ -306,12 +305,11 @@ xchk_iallocbt_rec(
/* Make sure this record is aligned to cluster and inoalignmnt size. */
agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino);
- if ((agbno & (xfs_ialloc_cluster_alignment(mp) - 1)) ||
- (agbno & (xfs_icluster_size_fsb(mp) - 1)))
+ if ((agbno & (mp->m_cluster_align - 1)) ||
+ (agbno & (mp->m_blocks_per_cluster - 1)))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
- *inode_blocks += XFS_B_TO_FSB(mp,
- irec.ir_count * mp->m_sb.sb_inodesize);
+ iabt->inodes += irec.ir_count;
/* Handle non-sparse inodes */
if (!xfs_inobt_issparse(irec.ir_holemask)) {
@@ -366,7 +364,6 @@ xchk_iallocbt_xref_rmap_btreeblks(
struct xfs_scrub *sc,
int which)
{
- struct xfs_owner_info oinfo;
xfs_filblks_t blocks;
xfs_extlen_t inobt_blocks = 0;
xfs_extlen_t finobt_blocks = 0;
@@ -388,9 +385,8 @@ xchk_iallocbt_xref_rmap_btreeblks(
return;
}
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
- error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
- &blocks);
+ error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
+ &XFS_RMAP_OINFO_INOBT, &blocks);
if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
return;
if (blocks != inobt_blocks + finobt_blocks)
@@ -405,21 +401,21 @@ STATIC void
xchk_iallocbt_xref_rmap_inodes(
struct xfs_scrub *sc,
int which,
- xfs_filblks_t inode_blocks)
+ unsigned long long inodes)
{
- struct xfs_owner_info oinfo;
xfs_filblks_t blocks;
+ xfs_filblks_t inode_blocks;
int error;
if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
return;
/* Check that we saw as many inode blocks as the rmap knows about. */
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
- error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
- &blocks);
+ error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
+ &XFS_RMAP_OINFO_INODES, &blocks);
if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
return;
+ inode_blocks = XFS_B_TO_FSB(sc->mp, inodes * sc->mp->m_sb.sb_inodesize);
if (blocks != inode_blocks)
xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
}
@@ -431,14 +427,14 @@ xchk_iallocbt(
xfs_btnum_t which)
{
struct xfs_btree_cur *cur;
- struct xfs_owner_info oinfo;
- xfs_filblks_t inode_blocks = 0;
+ struct xchk_iallocbt iabt = {
+ .inodes = 0,
+ };
int error;
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
- error = xchk_btree(sc, cur, xchk_iallocbt_rec, &oinfo,
- &inode_blocks);
+ error = xchk_btree(sc, cur, xchk_iallocbt_rec, &XFS_RMAP_OINFO_INOBT,
+ &iabt);
if (error)
return error;
@@ -452,7 +448,7 @@ xchk_iallocbt(
* to inode chunks with free inodes.
*/
if (which == XFS_BTNUM_INO)
- xchk_iallocbt_xref_rmap_inodes(sc, which, inode_blocks);
+ xchk_iallocbt_xref_rmap_inodes(sc, which, iabt.inodes);
return error;
}
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index e386c9b0b4ab..e213efc194a1 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -509,7 +509,6 @@ xchk_inode_xref(
xfs_ino_t ino,
struct xfs_dinode *dip)
{
- struct xfs_owner_info oinfo;
xfs_agnumber_t agno;
xfs_agblock_t agbno;
int error;
@@ -526,8 +525,7 @@ xchk_inode_xref(
xchk_xref_is_used_space(sc, agbno, 1);
xchk_inode_xref_finobt(sc, ino);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
- xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES);
xchk_xref_is_not_shared(sc, agbno, 1);
xchk_inode_xref_bmap(sc, dip);
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index e8c82b026083..708b4158eb90 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -383,7 +383,6 @@ xchk_refcountbt_rec(
STATIC void
xchk_refcount_xref_rmap(
struct xfs_scrub *sc,
- struct xfs_owner_info *oinfo,
xfs_filblks_t cow_blocks)
{
xfs_extlen_t refcbt_blocks = 0;
@@ -397,17 +396,16 @@ xchk_refcount_xref_rmap(
error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
if (!xchk_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
return;
- error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
- &blocks);
+ error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
+ &XFS_RMAP_OINFO_REFC, &blocks);
if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
return;
if (blocks != refcbt_blocks)
xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
/* Check that we saw as many cow blocks as the rmap knows about. */
- xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW);
- error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
- &blocks);
+ error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
+ &XFS_RMAP_OINFO_COW, &blocks);
if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
return;
if (blocks != cow_blocks)
@@ -419,17 +417,15 @@ int
xchk_refcountbt(
struct xfs_scrub *sc)
{
- struct xfs_owner_info oinfo;
xfs_agblock_t cow_blocks = 0;
int error;
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
error = xchk_btree(sc, sc->sa.refc_cur, xchk_refcountbt_rec,
- &oinfo, &cow_blocks);
+ &XFS_RMAP_OINFO_REFC, &cow_blocks);
if (error)
return error;
- xchk_refcount_xref_rmap(sc, &oinfo, cow_blocks);
+ xchk_refcount_xref_rmap(sc, cow_blocks);
return 0;
}
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 4fc0a5ea7673..1c8eecfe52b8 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -299,14 +299,14 @@ xrep_calc_ag_resblks(
/* Allocate a block in an AG. */
int
xrep_alloc_ag_block(
- struct xfs_scrub *sc,
- struct xfs_owner_info *oinfo,
- xfs_fsblock_t *fsbno,
- enum xfs_ag_resv_type resv)
+ struct xfs_scrub *sc,
+ const struct xfs_owner_info *oinfo,
+ xfs_fsblock_t *fsbno,
+ enum xfs_ag_resv_type resv)
{
- struct xfs_alloc_arg args = {0};
- xfs_agblock_t bno;
- int error;
+ struct xfs_alloc_arg args = {0};
+ xfs_agblock_t bno;
+ int error;
switch (resv) {
case XFS_AG_RESV_AGFL:
@@ -505,7 +505,6 @@ xrep_put_freelist(
struct xfs_scrub *sc,
xfs_agblock_t agbno)
{
- struct xfs_owner_info oinfo;
int error;
/* Make sure there's space on the freelist. */
@@ -518,9 +517,8 @@ xrep_put_freelist(
* create an rmap for the block prior to merging it or else other
* parts will break.
*/
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno, agbno, 1,
- &oinfo);
+ &XFS_RMAP_OINFO_AG);
if (error)
return error;
@@ -538,17 +536,17 @@ xrep_put_freelist(
/* Dispose of a single block. */
STATIC int
xrep_reap_block(
- struct xfs_scrub *sc,
- xfs_fsblock_t fsbno,
- struct xfs_owner_info *oinfo,
- enum xfs_ag_resv_type resv)
+ struct xfs_scrub *sc,
+ xfs_fsblock_t fsbno,
+ const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type resv)
{
- struct xfs_btree_cur *cur;
- struct xfs_buf *agf_bp = NULL;
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- bool has_other_rmap;
- int error;
+ struct xfs_btree_cur *cur;
+ struct xfs_buf *agf_bp = NULL;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ bool has_other_rmap;
+ int error;
agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
@@ -612,15 +610,15 @@ out_free:
/* Dispose of every block of every extent in the bitmap. */
int
xrep_reap_extents(
- struct xfs_scrub *sc,
- struct xfs_bitmap *bitmap,
- struct xfs_owner_info *oinfo,
- enum xfs_ag_resv_type type)
+ struct xfs_scrub *sc,
+ struct xfs_bitmap *bitmap,
+ const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type)
{
- struct xfs_bitmap_range *bmr;
- struct xfs_bitmap_range *n;
- xfs_fsblock_t fsbno;
- int error = 0;
+ struct xfs_bitmap_range *bmr;
+ struct xfs_bitmap_range *n;
+ xfs_fsblock_t fsbno;
+ int error = 0;
ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb));
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 9de321eee4ab..f2fc18bb7605 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -21,8 +21,9 @@ int xrep_roll_ag_trans(struct xfs_scrub *sc);
bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
enum xfs_ag_resv_type type);
xfs_extlen_t xrep_calc_ag_resblks(struct xfs_scrub *sc);
-int xrep_alloc_ag_block(struct xfs_scrub *sc, struct xfs_owner_info *oinfo,
- xfs_fsblock_t *fsbno, enum xfs_ag_resv_type resv);
+int xrep_alloc_ag_block(struct xfs_scrub *sc,
+ const struct xfs_owner_info *oinfo, xfs_fsblock_t *fsbno,
+ enum xfs_ag_resv_type resv);
int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb,
struct xfs_buf **bpp, xfs_btnum_t btnum,
const struct xfs_buf_ops *ops);
@@ -32,7 +33,7 @@ struct xfs_bitmap;
int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink);
int xrep_invalidate_blocks(struct xfs_scrub *sc, struct xfs_bitmap *btlist);
int xrep_reap_extents(struct xfs_scrub *sc, struct xfs_bitmap *exlist,
- struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
+ const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
struct xrep_find_ag_btree {
/* in: rmap owner of the btree we're looking for */
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 5e293c129813..92a140c5b55e 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -174,24 +174,21 @@ int
xchk_rmapbt(
struct xfs_scrub *sc)
{
- struct xfs_owner_info oinfo;
-
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
return xchk_btree(sc, sc->sa.rmap_cur, xchk_rmapbt_rec,
- &oinfo, NULL);
+ &XFS_RMAP_OINFO_AG, NULL);
}
/* xref check that the extent is owned by a given owner */
static inline void
xchk_xref_check_owner(
- struct xfs_scrub *sc,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- struct xfs_owner_info *oinfo,
- bool should_have_rmap)
+ struct xfs_scrub *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo,
+ bool should_have_rmap)
{
- bool has_rmap;
- int error;
+ bool has_rmap;
+ int error;
if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
return;
@@ -207,10 +204,10 @@ xchk_xref_check_owner(
/* xref check that the extent is owned by a given owner */
void
xchk_xref_is_owned_by(
- struct xfs_scrub *sc,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- struct xfs_owner_info *oinfo)
+ struct xfs_scrub *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo)
{
xchk_xref_check_owner(sc, bno, len, oinfo, true);
}
@@ -218,10 +215,10 @@ xchk_xref_is_owned_by(
/* xref check that the extent is not owned by a given owner */
void
xchk_xref_is_not_owned_by(
- struct xfs_scrub *sc,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- struct xfs_owner_info *oinfo)
+ struct xfs_scrub *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo)
{
xchk_xref_check_owner(sc, bno, len, oinfo, false);
}
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index af323b229c4b..22f754fba8e5 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -122,9 +122,9 @@ void xchk_xref_is_not_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
void xchk_xref_is_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
xfs_extlen_t len);
void xchk_xref_is_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
- xfs_extlen_t len, struct xfs_owner_info *oinfo);
+ xfs_extlen_t len, const struct xfs_owner_info *oinfo);
void xchk_xref_is_not_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
- xfs_extlen_t len, struct xfs_owner_info *oinfo);
+ xfs_extlen_t len, const struct xfs_owner_info *oinfo);
void xchk_xref_has_no_owner(struct xfs_scrub *sc, xfs_agblock_t agbno,
xfs_extlen_t len);
void xchk_xref_is_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 4e20f0e48232..8344b14031ef 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -12,6 +12,71 @@
#include <linux/tracepoint.h>
#include "xfs_bit.h"
+/*
+ * ftrace's __print_symbolic requires that all enum values be wrapped in the
+ * TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace
+ * ring buffer. Somehow this was only worth mentioning in the ftrace sample
+ * code.
+ */
+TRACE_DEFINE_ENUM(XFS_BTNUM_BNOi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_CNTi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_BMAPi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_INOi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_FINOi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_RMAPi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_REFCi);
+
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PROBE);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_SB);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_AGF);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_AGFL);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_AGI);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BNOBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_CNTBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_INOBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FINOBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RMAPBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_REFCNTBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_INODE);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BMBTD);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BMBTA);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BMBTC);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIR);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_XATTR);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_SYMLINK);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PARENT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RTBITMAP);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RTSUM);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_UQUOTA);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_GQUOTA);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PQUOTA);
+
+#define XFS_SCRUB_TYPE_STRINGS \
+ { XFS_SCRUB_TYPE_PROBE, "probe" }, \
+ { XFS_SCRUB_TYPE_SB, "sb" }, \
+ { XFS_SCRUB_TYPE_AGF, "agf" }, \
+ { XFS_SCRUB_TYPE_AGFL, "agfl" }, \
+ { XFS_SCRUB_TYPE_AGI, "agi" }, \
+ { XFS_SCRUB_TYPE_BNOBT, "bnobt" }, \
+ { XFS_SCRUB_TYPE_CNTBT, "cntbt" }, \
+ { XFS_SCRUB_TYPE_INOBT, "inobt" }, \
+ { XFS_SCRUB_TYPE_FINOBT, "finobt" }, \
+ { XFS_SCRUB_TYPE_RMAPBT, "rmapbt" }, \
+ { XFS_SCRUB_TYPE_REFCNTBT, "refcountbt" }, \
+ { XFS_SCRUB_TYPE_INODE, "inode" }, \
+ { XFS_SCRUB_TYPE_BMBTD, "bmapbtd" }, \
+ { XFS_SCRUB_TYPE_BMBTA, "bmapbta" }, \
+ { XFS_SCRUB_TYPE_BMBTC, "bmapbtc" }, \
+ { XFS_SCRUB_TYPE_DIR, "directory" }, \
+ { XFS_SCRUB_TYPE_XATTR, "xattr" }, \
+ { XFS_SCRUB_TYPE_SYMLINK, "symlink" }, \
+ { XFS_SCRUB_TYPE_PARENT, "parent" }, \
+ { XFS_SCRUB_TYPE_RTBITMAP, "rtbitmap" }, \
+ { XFS_SCRUB_TYPE_RTSUM, "rtsummary" }, \
+ { XFS_SCRUB_TYPE_UQUOTA, "usrquota" }, \
+ { XFS_SCRUB_TYPE_GQUOTA, "grpquota" }, \
+ { XFS_SCRUB_TYPE_PQUOTA, "prjquota" }
+
DECLARE_EVENT_CLASS(xchk_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
int error),
@@ -36,10 +101,10 @@ DECLARE_EVENT_CLASS(xchk_class,
__entry->flags = sm->sm_flags;
__entry->error = error;
),
- TP_printk("dev %d:%d ino 0x%llx type %u agno %u inum %llu gen %u flags 0x%x error %d",
+ TP_printk("dev %d:%d ino 0x%llx type %s agno %u inum %llu gen %u flags 0x%x error %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->agno,
__entry->inum,
__entry->gen,
@@ -78,9 +143,9 @@ TRACE_EVENT(xchk_op_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pS",
+ TP_printk("dev %d:%d type %s agno %u agbno %u error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->agno,
__entry->bno,
__entry->error,
@@ -109,11 +174,11 @@ TRACE_EVENT(xchk_file_op_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu error %d ret_ip %pS",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %s offset %llu error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->offset,
__entry->error,
__entry->ret_ip)
@@ -144,9 +209,9 @@ DECLARE_EVENT_CLASS(xchk_block_error_class,
__entry->bno = bno;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pS",
+ TP_printk("dev %d:%d type %s agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->agno,
__entry->bno,
__entry->ret_ip)
@@ -176,10 +241,10 @@ DECLARE_EVENT_CLASS(xchk_ino_error_class,
__entry->type = sc->sm->sm_type;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino 0x%llx type %u ret_ip %pS",
+ TP_printk("dev %d:%d ino 0x%llx type %s ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->ret_ip)
)
@@ -213,11 +278,11 @@ DECLARE_EVENT_CLASS(xchk_fblock_error_class,
__entry->offset = offset;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu ret_ip %pS",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %s offset %llu ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->offset,
__entry->ret_ip)
);
@@ -244,9 +309,9 @@ TRACE_EVENT(xchk_incomplete,
__entry->type = sc->sm->sm_type;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u ret_ip %pS",
+ TP_printk("dev %d:%d type %s ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->ret_ip)
);
@@ -278,10 +343,10 @@ TRACE_EVENT(xchk_btree_op_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
+ TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
- __entry->btnum,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->level,
__entry->ptr,
__entry->agno,
@@ -321,12 +386,12 @@ TRACE_EVENT(xchk_ifork_btree_op_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %s btree %s level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
- __entry->type,
- __entry->btnum,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->level,
__entry->ptr,
__entry->agno,
@@ -360,10 +425,10 @@ TRACE_EVENT(xchk_btree_error,
__entry->ptr = cur->bc_ptrs[level];
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS",
+ TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
- __entry->btnum,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->level,
__entry->ptr,
__entry->agno,
@@ -400,12 +465,12 @@ TRACE_EVENT(xchk_ifork_btree_error,
__entry->ptr = cur->bc_ptrs[level];
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %s btree %s level %d ptr %d agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
- __entry->type,
- __entry->btnum,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->level,
__entry->ptr,
__entry->agno,
@@ -439,10 +504,10 @@ DECLARE_EVENT_CLASS(xchk_sbtree_class,
__entry->nlevels = cur->bc_nlevels;
__entry->ptr = cur->bc_ptrs[level];
),
- TP_printk("dev %d:%d type %u btnum %d agno %u agbno %u level %d nlevels %d ptr %d",
+ TP_printk("dev %d:%d type %s btree %s agno %u agbno %u level %d nlevels %d ptr %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
- __entry->btnum,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->agno,
__entry->bno,
__entry->level,
@@ -473,9 +538,9 @@ TRACE_EVENT(xchk_xref_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u xref error %d ret_ip %pF",
+ TP_printk("dev %d:%d type %s xref error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->error,
__entry->ret_ip)
);
@@ -598,11 +663,11 @@ TRACE_EVENT(xrep_init_btblock,
__entry->agbno = agbno;
__entry->btnum = btnum;
),
- TP_printk("dev %d:%d agno %u agbno %u btnum %d",
+ TP_printk("dev %d:%d agno %u agbno %u btree %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
- __entry->btnum)
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS))
)
TRACE_EVENT(xrep_findroot_block,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 494b4338446e..e5c23948a8ab 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -10,6 +10,9 @@ extern struct bio_set xfs_ioend_bioset;
/*
* Types of I/O for bmap clustering and I/O completion tracking.
+ *
+ * This enum is used in string mapping in xfs_trace.h; please keep the
+ * TRACE_DEFINE_ENUMs for it up to date.
*/
enum {
XFS_IO_HOLE, /* covers region without any block allocation */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index b21ea2ba768d..eedc5e0156ff 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1992,7 +1992,6 @@ xfs_buf_delwri_submit_buffers(
struct list_head *wait_list)
{
struct xfs_buf *bp, *n;
- LIST_HEAD (submit_list);
int pinned = 0;
struct blk_plug plug;
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index d9da66c718bb..74ddf66f4cfe 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -494,7 +494,6 @@ xfs_efi_recover(
int error = 0;
xfs_extent_t *extp;
xfs_fsblock_t startblock_fsb;
- struct xfs_owner_info oinfo;
ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
@@ -526,11 +525,11 @@ xfs_efi_recover(
return error;
efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
- xfs_rmap_any_owner_update(&oinfo);
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
extp = &efip->efi_format.efi_extents[i];
error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
- extp->ext_len, &oinfo, false);
+ extp->ext_len,
+ &XFS_RMAP_OINFO_ANY_OWNER, false);
if (error)
goto abort_error;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 093c2b8d7e20..f3ef70c542e1 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -40,7 +40,6 @@ xfs_growfs_data_private(
xfs_rfsblock_t new;
xfs_agnumber_t oagcount;
xfs_trans_t *tp;
- LIST_HEAD (buffer_list);
struct aghdr_init_data id = {};
nb = in->newblocks;
@@ -252,7 +251,7 @@ xfs_growfs_data(
if (mp->m_sb.sb_imax_pct) {
uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
do_div(icount, 100);
- mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
+ mp->m_maxicount = XFS_FSB_TO_INO(mp, icount);
} else
mp->m_maxicount = 0;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 05db9540e459..ae667ba74a1c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2184,8 +2184,6 @@ xfs_ifree_cluster(
struct xfs_icluster *xic)
{
xfs_mount_t *mp = free_ip->i_mount;
- int blks_per_cluster;
- int inodes_per_cluster;
int nbufs;
int i, j;
int ioffset;
@@ -2199,11 +2197,9 @@ xfs_ifree_cluster(
inum = xic->first_ino;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
- blks_per_cluster = xfs_icluster_size_fsb(mp);
- inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
- nbufs = mp->m_ialloc_blks / blks_per_cluster;
+ nbufs = mp->m_ialloc_blks / mp->m_blocks_per_cluster;
- for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {
+ for (j = 0; j < nbufs; j++, inum += mp->m_inodes_per_cluster) {
/*
* The allocation bitmap tells us which inodes of the chunk were
* physically allocated. Skip the cluster if an inode falls into
@@ -2211,7 +2207,7 @@ xfs_ifree_cluster(
*/
ioffset = inum - xic->first_ino;
if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
- ASSERT(ioffset % inodes_per_cluster == 0);
+ ASSERT(ioffset % mp->m_inodes_per_cluster == 0);
continue;
}
@@ -2227,7 +2223,7 @@ xfs_ifree_cluster(
* to mark all the active inodes on the buffer stale.
*/
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
- mp->m_bsize * blks_per_cluster,
+ mp->m_bsize * mp->m_blocks_per_cluster,
XBF_UNMAPPED);
if (!bp)
@@ -2242,7 +2238,7 @@ xfs_ifree_cluster(
* want it to fail. We can acheive this by adding a write
* verifier to the buffer.
*/
- bp->b_ops = &xfs_inode_buf_ops;
+ bp->b_ops = &xfs_inode_buf_ops;
/*
* Walk the inodes already attached to the buffer and mark them
@@ -2274,7 +2270,7 @@ xfs_ifree_cluster(
* transaction stale above, which means there is no point in
* even trying to lock them.
*/
- for (i = 0; i < inodes_per_cluster; i++) {
+ for (i = 0; i < mp->m_inodes_per_cluster; i++) {
retry:
rcu_read_lock();
ip = radix_tree_lookup(&pag->pag_ici_root,
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index fba115f4103a..5001dca361e9 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -241,6 +241,32 @@ xfs_compat_ioc_bulkstat(
int done;
int error;
+ /*
+ * Output structure handling functions. Depending on the command,
+ * either the xfs_bstat and xfs_inogrp structures are written out
+ * to userpace memory via bulkreq.ubuffer. Normally the compat
+ * functions and structure size are the correct ones to use ...
+ */
+ inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat;
+ bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat;
+ size_t bs_one_size = sizeof(struct compat_xfs_bstat);
+
+#ifdef CONFIG_X86_X32
+ if (in_x32_syscall()) {
+ /*
+ * ... but on x32 the input xfs_fsop_bulkreq has pointers
+ * which must be handled in the "compat" (32-bit) way, while
+ * the xfs_bstat and xfs_inogrp structures follow native 64-
+ * bit layout convention. So adjust accordingly, otherwise
+ * the data written out in compat layout will not match what
+ * x32 userspace expects.
+ */
+ inumbers_func = xfs_inumbers_fmt;
+ bs_one_func = xfs_bulkstat_one;
+ bs_one_size = sizeof(struct xfs_bstat);
+ }
+#endif
+
/* done = 1 if there are more stats to get and if bulkstat */
/* should be called again (unused here, but used in dmapi) */
@@ -272,15 +298,15 @@ xfs_compat_ioc_bulkstat(
if (cmd == XFS_IOC_FSINUMBERS_32) {
error = xfs_inumbers(mp, &inlast, &count,
- bulkreq.ubuffer, xfs_inumbers_fmt_compat);
+ bulkreq.ubuffer, inumbers_func);
} else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
int res;
- error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
- sizeof(compat_xfs_bstat_t), NULL, &res);
+ error = bs_one_func(mp, inlast, bulkreq.ubuffer,
+ bs_one_size, NULL, &res);
} else if (cmd == XFS_IOC_FSBULKSTAT_32) {
error = xfs_bulkstat(mp, &inlast, &count,
- xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
+ bs_one_func, bs_one_size,
bulkreq.ubuffer, &done);
} else
error = -EINVAL;
@@ -336,6 +362,7 @@ xfs_compat_attrlist_by_handle(
{
int error;
attrlist_cursor_kern_t *cursor;
+ compat_xfs_fsop_attrlist_handlereq_t __user *p = arg;
compat_xfs_fsop_attrlist_handlereq_t al_hreq;
struct dentry *dentry;
char *kbuf;
@@ -370,6 +397,11 @@ xfs_compat_attrlist_by_handle(
if (error)
goto out_kfree;
+ if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) {
+ error = -EFAULT;
+ goto out_kfree;
+ }
+
if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
error = -EFAULT;
@@ -547,8 +579,12 @@ xfs_file_compat_ioctl(
case FS_IOC_GETFSMAP:
case XFS_IOC_SCRUB_METADATA:
return xfs_file_ioctl(filp, cmd, p);
-#ifndef BROKEN_X86_ALIGNMENT
- /* These are handled fine if no alignment issues */
+#if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32)
+ /*
+ * These are handled fine if no alignment issues. To support x32
+ * which uses native 64-bit alignment we must emit these cases in
+ * addition to the ia-32 compat set below.
+ */
case XFS_IOC_ALLOCSP:
case XFS_IOC_FREESP:
case XFS_IOC_RESVSP:
@@ -561,8 +597,16 @@ xfs_file_compat_ioctl(
case XFS_IOC_FSGROWFSDATA:
case XFS_IOC_FSGROWFSRT:
case XFS_IOC_ZERO_RANGE:
+#ifdef CONFIG_X86_X32
+ /*
+ * x32 special: this gets a different cmd number from the ia-32 compat
+ * case below; the associated data will match native 64-bit alignment.
+ */
+ case XFS_IOC_SWAPEXT:
+#endif
return xfs_file_ioctl(filp, cmd, p);
-#else
+#endif
+#if defined(BROKEN_X86_ALIGNMENT)
case XFS_IOC_ALLOCSP_32:
case XFS_IOC_FREESP_32:
case XFS_IOC_ALLOCSP64_32:
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index e9508ba01ed1..942e4aa5e729 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -167,20 +167,18 @@ xfs_bulkstat_ichunk_ra(
{
xfs_agblock_t agbno;
struct blk_plug plug;
- int blks_per_cluster;
- int inodes_per_cluster;
int i; /* inode chunk index */
agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
- blks_per_cluster = xfs_icluster_size_fsb(mp);
- inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
blk_start_plug(&plug);
for (i = 0; i < XFS_INODES_PER_CHUNK;
- i += inodes_per_cluster, agbno += blks_per_cluster) {
- if (xfs_inobt_maskn(i, inodes_per_cluster) & ~irec->ir_free) {
- xfs_btree_reada_bufs(mp, agno, agbno, blks_per_cluster,
- &xfs_inode_buf_ops);
+ i += mp->m_inodes_per_cluster, agbno += mp->m_blocks_per_cluster) {
+ if (xfs_inobt_maskn(i, mp->m_inodes_per_cluster) &
+ ~irec->ir_free) {
+ xfs_btree_reada_bufs(mp, agno, agbno,
+ mp->m_blocks_per_cluster,
+ &xfs_inode_buf_ops);
}
}
blk_finish_plug(&plug);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 1fc9e9042e0e..9fe88d125f0a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3850,7 +3850,6 @@ xlog_recover_do_icreate_pass2(
unsigned int count;
unsigned int isize;
xfs_agblock_t length;
- int blks_per_cluster;
int bb_per_cluster;
int cancel_count;
int nbufs;
@@ -3918,14 +3917,13 @@ xlog_recover_do_icreate_pass2(
* buffers for cancellation so we don't overwrite anything written after
* a cancellation.
*/
- blks_per_cluster = xfs_icluster_size_fsb(mp);
- bb_per_cluster = XFS_FSB_TO_BB(mp, blks_per_cluster);
- nbufs = length / blks_per_cluster;
+ bb_per_cluster = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
+ nbufs = length / mp->m_blocks_per_cluster;
for (i = 0, cancel_count = 0; i < nbufs; i++) {
xfs_daddr_t daddr;
daddr = XFS_AGB_TO_DADDR(mp, agno,
- agbno + i * blks_per_cluster);
+ agbno + i * mp->m_blocks_per_cluster);
if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0))
cancel_count++;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 02d15098dbee..b4d8c318be3c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -798,6 +798,10 @@ xfs_mountfs(
if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
mp->m_inode_cluster_size = new_size;
}
+ mp->m_blocks_per_cluster = xfs_icluster_size_fsb(mp);
+ mp->m_inodes_per_cluster = XFS_FSB_TO_INO(mp, mp->m_blocks_per_cluster);
+ mp->m_cluster_align = xfs_ialloc_cluster_alignment(mp);
+ mp->m_cluster_align_inodes = XFS_FSB_TO_INO(mp, mp->m_cluster_align);
/*
* If enabled, sparse inode chunk alignment is expected to match the
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7964513c3128..7daafe064af8 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -89,6 +89,13 @@ typedef struct xfs_mount {
int m_logbsize; /* size of each log buffer */
uint m_rsumlevels; /* rt summary levels */
uint m_rsumsize; /* size of rt summary, bytes */
+ /*
+ * Optional cache of rt summary level per bitmap block with the
+ * invariant that m_rsum_cache[bbno] <= the minimum i for which
+ * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip
+ * inode lock.
+ */
+ uint8_t *m_rsum_cache;
struct xfs_inode *m_rbmip; /* pointer to bitmap inode */
struct xfs_inode *m_rsumip; /* pointer to summary inode */
struct xfs_inode *m_rootip; /* pointer to root directory */
@@ -101,6 +108,10 @@ typedef struct xfs_mount {
uint8_t m_agno_log; /* log #ag's */
uint8_t m_agino_log; /* #bits for agino in inum */
uint m_inode_cluster_size;/* min inode buf size */
+ unsigned int m_inodes_per_cluster;
+ unsigned int m_blocks_per_cluster;
+ unsigned int m_cluster_align;
+ unsigned int m_cluster_align_inodes;
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
uint m_blockwmask; /* blockwsize-1 */
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 322a852ce284..c5b4fa004ca4 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -623,54 +623,47 @@ out:
}
/*
- * Remap parts of a file's data fork after a successful CoW.
+ * Remap part of the CoW fork into the data fork.
+ *
+ * We aim to remap the range starting at @offset_fsb and ending at @end_fsb
+ * into the data fork; this function will remap what it can (at the end of the
+ * range) and update @end_fsb appropriately. Each remap gets its own
+ * transaction because we can end up merging and splitting bmbt blocks for
+ * every remap operation and we'd like to keep the block reservation
+ * requirements as low as possible.
*/
-int
-xfs_reflink_end_cow(
- struct xfs_inode *ip,
- xfs_off_t offset,
- xfs_off_t count)
+STATIC int
+xfs_reflink_end_cow_extent(
+ struct xfs_inode *ip,
+ xfs_fileoff_t offset_fsb,
+ xfs_fileoff_t *end_fsb)
{
- struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- struct xfs_bmbt_irec got, del;
- struct xfs_trans *tp;
- xfs_fileoff_t offset_fsb;
- xfs_fileoff_t end_fsb;
- int error;
- unsigned int resblks;
- xfs_filblks_t rlen;
- struct xfs_iext_cursor icur;
-
- trace_xfs_reflink_end_cow(ip, offset, count);
+ struct xfs_bmbt_irec got, del;
+ struct xfs_iext_cursor icur;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ xfs_filblks_t rlen;
+ unsigned int resblks;
+ int error;
/* No COW extents? That's easy! */
- if (ifp->if_bytes == 0)
+ if (ifp->if_bytes == 0) {
+ *end_fsb = offset_fsb;
return 0;
+ }
- offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
- end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
+ resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
+ XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
+ if (error)
+ return error;
/*
- * Start a rolling transaction to switch the mappings. We're
- * unlikely ever to have to remap 16T worth of single-block
- * extents, so just cap the worst case extent count to 2^32-1.
- * Stick a warning in just in case, and avoid 64-bit division.
+ * Lock the inode. We have to ijoin without automatic unlock because
+ * the lead transaction is the refcountbt record deletion; the data
+ * fork update follows as a deferred log item.
*/
- BUILD_BUG_ON(MAX_RW_COUNT > UINT_MAX);
- if (end_fsb - offset_fsb > UINT_MAX) {
- error = -EFSCORRUPTED;
- xfs_force_shutdown(ip->i_mount, SHUTDOWN_CORRUPT_INCORE);
- ASSERT(0);
- goto out;
- }
- resblks = XFS_NEXTENTADD_SPACE_RES(ip->i_mount,
- (unsigned int)(end_fsb - offset_fsb),
- XFS_DATA_FORK);
- error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
- resblks, 0, XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
- if (error)
- goto out;
-
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
@@ -679,80 +672,131 @@ xfs_reflink_end_cow(
* left by the time I/O completes for the loser of the race. In that
* case we are done.
*/
- if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
+ if (!xfs_iext_lookup_extent_before(ip, ifp, end_fsb, &icur, &got) ||
+ got.br_startoff + got.br_blockcount <= offset_fsb) {
+ *end_fsb = offset_fsb;
goto out_cancel;
+ }
- /* Walk backwards until we're out of the I/O range... */
- while (got.br_startoff + got.br_blockcount > offset_fsb) {
- del = got;
- xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
-
- /* Extent delete may have bumped ext forward */
- if (!del.br_blockcount)
- goto prev_extent;
+ /*
+ * Structure copy @got into @del, then trim @del to the range that we
+ * were asked to remap. We preserve @got for the eventual CoW fork
+ * deletion; from now on @del represents the mapping that we're
+ * actually remapping.
+ */
+ del = got;
+ xfs_trim_extent(&del, offset_fsb, *end_fsb - offset_fsb);
- /*
- * Only remap real extent that contain data. With AIO
- * speculatively preallocations can leak into the range we
- * are called upon, and we need to skip them.
- */
- if (!xfs_bmap_is_real_extent(&got))
- goto prev_extent;
+ ASSERT(del.br_blockcount > 0);
- /* Unmap the old blocks in the data fork. */
- ASSERT(tp->t_firstblock == NULLFSBLOCK);
- rlen = del.br_blockcount;
- error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1);
- if (error)
- goto out_cancel;
+ /*
+ * Only remap real extents that contain data. With AIO, speculative
+ * preallocations can leak into the range we are called upon, and we
+ * need to skip them.
+ */
+ if (!xfs_bmap_is_real_extent(&got)) {
+ *end_fsb = del.br_startoff;
+ goto out_cancel;
+ }
- /* Trim the extent to whatever got unmapped. */
- if (rlen) {
- xfs_trim_extent(&del, del.br_startoff + rlen,
- del.br_blockcount - rlen);
- }
- trace_xfs_reflink_cow_remap(ip, &del);
+ /* Unmap the old blocks in the data fork. */
+ rlen = del.br_blockcount;
+ error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1);
+ if (error)
+ goto out_cancel;
- /* Free the CoW orphan record. */
- error = xfs_refcount_free_cow_extent(tp, del.br_startblock,
- del.br_blockcount);
- if (error)
- goto out_cancel;
+ /* Trim the extent to whatever got unmapped. */
+ xfs_trim_extent(&del, del.br_startoff + rlen, del.br_blockcount - rlen);
+ trace_xfs_reflink_cow_remap(ip, &del);
- /* Map the new blocks into the data fork. */
- error = xfs_bmap_map_extent(tp, ip, &del);
- if (error)
- goto out_cancel;
+ /* Free the CoW orphan record. */
+ error = xfs_refcount_free_cow_extent(tp, del.br_startblock,
+ del.br_blockcount);
+ if (error)
+ goto out_cancel;
- /* Charge this new data fork mapping to the on-disk quota. */
- xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT,
- (long)del.br_blockcount);
+ /* Map the new blocks into the data fork. */
+ error = xfs_bmap_map_extent(tp, ip, &del);
+ if (error)
+ goto out_cancel;
- /* Remove the mapping from the CoW fork. */
- xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
+ /* Charge this new data fork mapping to the on-disk quota. */
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT,
+ (long)del.br_blockcount);
- error = xfs_defer_finish(&tp);
- if (error)
- goto out_cancel;
- if (!xfs_iext_get_extent(ifp, &icur, &got))
- break;
- continue;
-prev_extent:
- if (!xfs_iext_prev_extent(ifp, &icur, &got))
- break;
- }
+ /* Remove the mapping from the CoW fork. */
+ xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
- goto out;
+ return error;
+
+ /* Update the caller about how much progress we made. */
+ *end_fsb = del.br_startoff;
return 0;
out_cancel:
xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out:
- trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
+ return error;
+}
+
+/*
+ * Remap parts of a file's data fork after a successful CoW.
+ */
+int
+xfs_reflink_end_cow(
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_off_t count)
+{
+ xfs_fileoff_t offset_fsb;
+ xfs_fileoff_t end_fsb;
+ int error = 0;
+
+ trace_xfs_reflink_end_cow(ip, offset, count);
+
+ offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+ end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
+
+ /*
+ * Walk backwards until we're out of the I/O range. The loop function
+ * repeatedly cycles the ILOCK to allocate one transaction per remapped
+ * extent.
+ *
+ * If we're being called by writeback then the the pages will still
+ * have PageWriteback set, which prevents races with reflink remapping
+ * and truncate. Reflink remapping prevents races with writeback by
+ * taking the iolock and mmaplock before flushing the pages and
+ * remapping, which means there won't be any further writeback or page
+ * cache dirtying until the reflink completes.
+ *
+ * We should never have two threads issuing writeback for the same file
+ * region. There are also have post-eof checks in the writeback
+ * preparation code so that we don't bother writing out pages that are
+ * about to be truncated.
+ *
+ * If we're being called as part of directio write completion, the dio
+ * count is still elevated, which reflink and truncate will wait for.
+ * Reflink remapping takes the iolock and mmaplock and waits for
+ * pending dio to finish, which should prevent any directio until the
+ * remap completes. Multiple concurrent directio writes to the same
+ * region are handled by end_cow processing only occurring for the
+ * threads which succeed; the outcome of multiple overlapping direct
+ * writes is not well defined anyway.
+ *
+ * It's possible that a buffered write and a direct write could collide
+ * here (the buffered write stumbles in after the dio flushes and
+ * invalidates the page cache and immediately queues writeback), but we
+ * have never supported this 100%. If either disk write succeeds the
+ * blocks will be remapped.
+ */
+ while (end_fsb > offset_fsb && !error)
+ error = xfs_reflink_end_cow_extent(ip, offset_fsb, &end_fsb);
+
+ if (error)
+ trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
return error;
}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 926ed314ffba..ac0fcdad0c4e 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -64,8 +64,12 @@ xfs_rtany_summary(
int log; /* loop counter, log2 of ext. size */
xfs_suminfo_t sum; /* summary data */
+ /* There are no extents at levels < m_rsum_cache[bbno]. */
+ if (mp->m_rsum_cache && low < mp->m_rsum_cache[bbno])
+ low = mp->m_rsum_cache[bbno];
+
/*
- * Loop over logs of extent sizes. Order is irrelevant.
+ * Loop over logs of extent sizes.
*/
for (log = low; log <= high; log++) {
/*
@@ -80,13 +84,17 @@ xfs_rtany_summary(
*/
if (sum) {
*stat = 1;
- return 0;
+ goto out;
}
}
/*
* Found nothing, return failure.
*/
*stat = 0;
+out:
+ /* There were no extents at levels < log. */
+ if (mp->m_rsum_cache && log > mp->m_rsum_cache[bbno])
+ mp->m_rsum_cache[bbno] = log;
return 0;
}
@@ -853,6 +861,21 @@ out_trans_cancel:
return error;
}
+static void
+xfs_alloc_rsum_cache(
+ xfs_mount_t *mp, /* file system mount structure */
+ xfs_extlen_t rbmblocks) /* number of rt bitmap blocks */
+{
+ /*
+ * The rsum cache is initialized to all zeroes, which is trivially a
+ * lower bound on the minimum level with any free extents. We can
+ * continue without the cache if it couldn't be allocated.
+ */
+ mp->m_rsum_cache = kmem_zalloc_large(rbmblocks, KM_SLEEP);
+ if (!mp->m_rsum_cache)
+ xfs_warn(mp, "could not allocate realtime summary cache");
+}
+
/*
* Visible (exported) functions.
*/
@@ -881,6 +904,7 @@ xfs_growfs_rt(
xfs_extlen_t rsumblocks; /* current number of rt summary blks */
xfs_sb_t *sbp; /* old superblock */
xfs_fsblock_t sumbno; /* summary block number */
+ uint8_t *rsum_cache; /* old summary cache */
sbp = &mp->m_sb;
/*
@@ -937,6 +961,11 @@ xfs_growfs_rt(
error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip);
if (error)
return error;
+
+ rsum_cache = mp->m_rsum_cache;
+ if (nrbmblocks != sbp->sb_rbmblocks)
+ xfs_alloc_rsum_cache(mp, nrbmblocks);
+
/*
* Allocate a new (fake) mount/sb.
*/
@@ -1062,6 +1091,20 @@ error_cancel:
*/
kmem_free(nmp);
+ /*
+ * If we had to allocate a new rsum_cache, we either need to free the
+ * old one (if we succeeded) or free the new one and restore the old one
+ * (if there was an error).
+ */
+ if (rsum_cache != mp->m_rsum_cache) {
+ if (error) {
+ kmem_free(mp->m_rsum_cache);
+ mp->m_rsum_cache = rsum_cache;
+ } else {
+ kmem_free(rsum_cache);
+ }
+ }
+
return error;
}
@@ -1187,8 +1230,8 @@ xfs_rtmount_init(
}
/*
- * Get the bitmap and summary inodes into the mount structure
- * at mount time.
+ * Get the bitmap and summary inodes and the summary cache into the mount
+ * structure at mount time.
*/
int /* error */
xfs_rtmount_inodes(
@@ -1198,19 +1241,18 @@ xfs_rtmount_inodes(
xfs_sb_t *sbp;
sbp = &mp->m_sb;
- if (sbp->sb_rbmino == NULLFSINO)
- return 0;
error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
if (error)
return error;
ASSERT(mp->m_rbmip != NULL);
- ASSERT(sbp->sb_rsumino != NULLFSINO);
+
error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
if (error) {
xfs_irele(mp->m_rbmip);
return error;
}
ASSERT(mp->m_rsumip != NULL);
+ xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks);
return 0;
}
@@ -1218,6 +1260,7 @@ void
xfs_rtunmount_inodes(
struct xfs_mount *mp)
{
+ kmem_free(mp->m_rsum_cache);
if (mp->m_rbmip)
xfs_irele(mp->m_rbmip);
if (mp->m_rsumip)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d3e6cd063688..c9097cb0b955 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -38,6 +38,7 @@
#include "xfs_refcount_item.h"
#include "xfs_bmap_item.h"
#include "xfs_reflink.h"
+#include "xfs_defer.h"
#include <linux/namei.h>
#include <linux/dax.h>
@@ -607,7 +608,7 @@ xfs_set_inode_alloc(
}
/* Get the last possible inode in the filesystem */
- agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
+ agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
/*
@@ -1149,7 +1150,7 @@ xfs_fs_statfs(
statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
statp->f_bavail = statp->f_bfree;
- fakeinos = statp->f_bfree << sbp->sb_inopblog;
+ fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
if (mp->m_maxicount)
statp->f_files = min_t(typeof(statp->f_files),
@@ -2085,11 +2086,6 @@ init_xfs_fs(void)
printk(KERN_INFO XFS_VERSION_STRING " with "
XFS_BUILD_OPTIONS " enabled\n");
- xfs_extent_free_init_defer_op();
- xfs_rmap_update_init_defer_op();
- xfs_refcount_update_init_defer_op();
- xfs_bmap_update_init_defer_op();
-
xfs_dir_startup();
error = xfs_init_zones();
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index a3e98c64b6e3..b2c1177c717f 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -192,6 +192,7 @@ xfs_symlink(
pathlen = strlen(target_path);
if (pathlen >= XFS_SYMLINK_MAXLEN) /* total string too long */
return -ENAMETOOLONG;
+ ASSERT(pathlen > 0);
udqp = gdqp = NULL;
prid = xfs_get_initial_prid(dp);
@@ -378,6 +379,12 @@ out_release_inode:
/*
* Free a symlink that has blocks associated with it.
+ *
+ * Note: zero length symlinks are not allowed to exist. When we set the size to
+ * zero, also change it to a regular file so that it does not get written to
+ * disk as a zero length symlink. The inode is on the unlinked list already, so
+ * userspace cannot find this inode anymore, so this change is not user visible
+ * but allows us to catch corrupt zero-length symlinks in the verifiers.
*/
STATIC int
xfs_inactive_symlink_rmt(
@@ -412,13 +419,14 @@ xfs_inactive_symlink_rmt(
xfs_trans_ijoin(tp, ip, 0);
/*
- * Lock the inode, fix the size, and join it to the transaction.
- * Hold it so in the normal path, we still have it locked for
- * the second transaction. In the error paths we need it
+ * Lock the inode, fix the size, turn it into a regular file and join it
+ * to the transaction. Hold it so in the normal path, we still have it
+ * locked for the second transaction. In the error paths we need it
* held so the cancel won't rele it, see below.
*/
size = (int)ip->i_d.di_size;
ip->i_d.di_size = 0;
+ VFS_I(ip)->i_mode = (VFS_I(ip)->i_mode & ~S_IFMT) | S_IFREG;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
/*
* Find the block(s) so we can inval and unmap them.
@@ -494,17 +502,10 @@ xfs_inactive_symlink(
return -EIO;
xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- /*
- * Zero length symlinks _can_ exist.
- */
pathlen = (int)ip->i_d.di_size;
- if (!pathlen) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return 0;
- }
+ ASSERT(pathlen);
- if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) {
+ if (pathlen <= 0 || pathlen > XFS_SYMLINK_MAXLEN) {
xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)",
__func__, (unsigned long long)ip->i_ino, pathlen);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -512,12 +513,12 @@ xfs_inactive_symlink(
return -EFSCORRUPTED;
}
+ /*
+ * Inline fork state gets removed by xfs_difree() so we have nothing to
+ * do here in that case.
+ */
if (ip->i_df.if_flags & XFS_IFINLINE) {
- if (ip->i_df.if_bytes > 0)
- xfs_idata_realloc(ip, -(ip->i_df.if_bytes),
- XFS_DATA_FORK);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- ASSERT(ip->i_df.if_bytes == 0);
return 0;
}
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 8a6532aae779..6fcc893dfc91 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -640,6 +640,16 @@ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
+/*
+ * ftrace's __print_symbolic requires that all enum values be wrapped in the
+ * TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace
+ * ring buffer. Somehow this was only worth mentioning in the ftrace sample
+ * code.
+ */
+TRACE_DEFINE_ENUM(PE_SIZE_PTE);
+TRACE_DEFINE_ENUM(PE_SIZE_PMD);
+TRACE_DEFINE_ENUM(PE_SIZE_PUD);
+
TRACE_EVENT(xfs_filemap_fault,
TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
bool write_fault),
@@ -1208,6 +1218,12 @@ DEFINE_EVENT(xfs_readpage_class, name, \
DEFINE_READPAGE_EVENT(xfs_vm_readpage);
DEFINE_READPAGE_EVENT(xfs_vm_readpages);
+TRACE_DEFINE_ENUM(XFS_IO_HOLE);
+TRACE_DEFINE_ENUM(XFS_IO_DELALLOC);
+TRACE_DEFINE_ENUM(XFS_IO_UNWRITTEN);
+TRACE_DEFINE_ENUM(XFS_IO_OVERWRITE);
+TRACE_DEFINE_ENUM(XFS_IO_COW);
+
DECLARE_EVENT_CLASS(xfs_imap_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
int type, struct xfs_bmbt_irec *irec),
@@ -1885,11 +1901,11 @@ TRACE_EVENT(xfs_dir2_leafn_moveents,
{ 0, "target" }, \
{ 1, "temp" }
-#define XFS_INODE_FORMAT_STR \
- { 0, "invalid" }, \
- { 1, "local" }, \
- { 2, "extent" }, \
- { 3, "btree" }
+TRACE_DEFINE_ENUM(XFS_DINODE_FMT_DEV);
+TRACE_DEFINE_ENUM(XFS_DINODE_FMT_LOCAL);
+TRACE_DEFINE_ENUM(XFS_DINODE_FMT_EXTENTS);
+TRACE_DEFINE_ENUM(XFS_DINODE_FMT_BTREE);
+TRACE_DEFINE_ENUM(XFS_DINODE_FMT_UUID);
DECLARE_EVENT_CLASS(xfs_swap_extent_class,
TP_PROTO(struct xfs_inode *ip, int which),
@@ -2178,6 +2194,14 @@ DEFINE_DISCARD_EVENT(xfs_discard_exclude);
DEFINE_DISCARD_EVENT(xfs_discard_busy);
/* btree cursor events */
+TRACE_DEFINE_ENUM(XFS_BTNUM_BNOi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_CNTi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_BMAPi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_INOi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_FINOi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_RMAPi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_REFCi);
+
DECLARE_EVENT_CLASS(xfs_btree_cur_class,
TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp),
TP_ARGS(cur, level, bp),
@@ -2197,9 +2221,9 @@ DECLARE_EVENT_CLASS(xfs_btree_cur_class,
__entry->ptr = cur->bc_ptrs[level];
__entry->daddr = bp ? bp->b_bn : -1;
),
- TP_printk("dev %d:%d btnum %d level %d/%d ptr %d daddr 0x%llx",
+ TP_printk("dev %d:%d btree %s level %d/%d ptr %d daddr 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->btnum,
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->level,
__entry->nlevels,
__entry->ptr,
@@ -2276,7 +2300,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class,
),
TP_fast_assign(
__entry->dev = mp ? mp->m_super->s_dev : 0;
- __entry->type = dfp->dfp_type->type;
+ __entry->type = dfp->dfp_type;
__entry->intent = dfp->dfp_intent;
__entry->committed = dfp->dfp_done != NULL;
__entry->nr = dfp->dfp_count;
@@ -2405,7 +2429,7 @@ DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_agfl_free_deferred);
DECLARE_EVENT_CLASS(xfs_rmap_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten,
- struct xfs_owner_info *oinfo),
+ const struct xfs_owner_info *oinfo),
TP_ARGS(mp, agno, agbno, len, unwritten, oinfo),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -2440,7 +2464,7 @@ DECLARE_EVENT_CLASS(xfs_rmap_class,
DEFINE_EVENT(xfs_rmap_class, name, \
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, \
- struct xfs_owner_info *oinfo), \
+ const struct xfs_owner_info *oinfo), \
TP_ARGS(mp, agno, agbno, len, unwritten, oinfo))
/* simple AG-based error/%ip tracepoint class */
@@ -2610,10 +2634,9 @@ DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error);
#define DEFINE_AG_EXTENT_EVENT(name) DEFINE_DISCARD_EVENT(name)
/* ag btree lookup tracepoint class */
-#define XFS_AG_BTREE_CMP_FORMAT_STR \
- { XFS_LOOKUP_EQ, "eq" }, \
- { XFS_LOOKUP_LE, "le" }, \
- { XFS_LOOKUP_GE, "ge" }
+TRACE_DEFINE_ENUM(XFS_LOOKUP_EQi);
+TRACE_DEFINE_ENUM(XFS_LOOKUP_LEi);
+TRACE_DEFINE_ENUM(XFS_LOOKUP_GEi);
DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_lookup_t dir),
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index a0c5dbda18aa..c6e1c5704a8c 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -223,13 +223,13 @@ void xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *);
bool xfs_trans_buf_is_dirty(struct xfs_buf *bp);
void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
-void xfs_extent_free_init_defer_op(void);
struct xfs_efd_log_item *xfs_trans_get_efd(struct xfs_trans *,
struct xfs_efi_log_item *,
uint);
int xfs_trans_free_extent(struct xfs_trans *,
struct xfs_efd_log_item *, xfs_fsblock_t,
- xfs_extlen_t, struct xfs_owner_info *,
+ xfs_extlen_t,
+ const struct xfs_owner_info *,
bool);
int xfs_trans_commit(struct xfs_trans *);
int xfs_trans_roll(struct xfs_trans **);
@@ -248,7 +248,6 @@ extern kmem_zone_t *xfs_trans_zone;
/* rmap updates */
enum xfs_rmap_intent_type;
-void xfs_rmap_update_init_defer_op(void);
struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
struct xfs_rui_log_item *ruip);
int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
@@ -260,7 +259,6 @@ int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
/* refcount updates */
enum xfs_refcount_intent_type;
-void xfs_refcount_update_init_defer_op(void);
struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp,
struct xfs_cui_log_item *cuip);
int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp,
@@ -272,7 +270,6 @@ int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp,
/* mapping updates */
enum xfs_bmap_intent_type;
-void xfs_bmap_update_init_defer_op(void);
struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp,
struct xfs_bui_log_item *buip);
int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp,
diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c
index 741c558b2179..11cff449d055 100644
--- a/fs/xfs/xfs_trans_bmap.c
+++ b/fs/xfs/xfs_trans_bmap.c
@@ -17,6 +17,7 @@
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_inode.h"
+#include "xfs_defer.h"
/*
* This routine is called to allocate a "bmap update done"
@@ -220,8 +221,7 @@ xfs_bmap_update_cancel_item(
kmem_free(bmap);
}
-static const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
- .type = XFS_DEFER_OPS_TYPE_BMAP,
+const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
.max_items = XFS_BUI_MAX_FAST_EXTENTS,
.diff_items = xfs_bmap_update_diff_items,
.create_intent = xfs_bmap_update_create_intent,
@@ -231,10 +231,3 @@ static const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
.finish_item = xfs_bmap_update_finish_item,
.cancel_item = xfs_bmap_update_cancel_item,
};
-
-/* Register the deferred op type. */
-void
-xfs_bmap_update_init_defer_op(void)
-{
- xfs_defer_init_op_type(&xfs_bmap_update_defer_type);
-}
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index 855c0b651fd4..0710434eb240 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -18,6 +18,7 @@
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_trace.h"
+#include "xfs_defer.h"
/*
* This routine is called to allocate an "extent free done"
@@ -52,19 +53,20 @@ xfs_trans_get_efd(struct xfs_trans *tp,
*/
int
xfs_trans_free_extent(
- struct xfs_trans *tp,
- struct xfs_efd_log_item *efdp,
- xfs_fsblock_t start_block,
- xfs_extlen_t ext_len,
- struct xfs_owner_info *oinfo,
- bool skip_discard)
+ struct xfs_trans *tp,
+ struct xfs_efd_log_item *efdp,
+ xfs_fsblock_t start_block,
+ xfs_extlen_t ext_len,
+ const struct xfs_owner_info *oinfo,
+ bool skip_discard)
{
- struct xfs_mount *mp = tp->t_mountp;
- uint next_extent;
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
- xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, start_block);
- struct xfs_extent *extp;
- int error;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_extent *extp;
+ uint next_extent;
+ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
+ xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp,
+ start_block);
+ int error;
trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
@@ -206,8 +208,7 @@ xfs_extent_free_cancel_item(
kmem_free(free);
}
-static const struct xfs_defer_op_type xfs_extent_free_defer_type = {
- .type = XFS_DEFER_OPS_TYPE_FREE,
+const struct xfs_defer_op_type xfs_extent_free_defer_type = {
.max_items = XFS_EFI_MAX_FAST_EXTENTS,
.diff_items = xfs_extent_free_diff_items,
.create_intent = xfs_extent_free_create_intent,
@@ -274,8 +275,7 @@ xfs_agfl_free_finish_item(
/* sub-type with special handling for AGFL deferred frees */
-static const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
- .type = XFS_DEFER_OPS_TYPE_AGFL_FREE,
+const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
.max_items = XFS_EFI_MAX_FAST_EXTENTS,
.diff_items = xfs_extent_free_diff_items,
.create_intent = xfs_extent_free_create_intent,
@@ -285,11 +285,3 @@ static const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
.finish_item = xfs_agfl_free_finish_item,
.cancel_item = xfs_extent_free_cancel_item,
};
-
-/* Register the deferred op type. */
-void
-xfs_extent_free_init_defer_op(void)
-{
- xfs_defer_init_op_type(&xfs_extent_free_defer_type);
- xfs_defer_init_op_type(&xfs_agfl_free_defer_type);
-}
diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c
index 523c55663954..6c947ff4faf6 100644
--- a/fs/xfs/xfs_trans_refcount.c
+++ b/fs/xfs/xfs_trans_refcount.c
@@ -16,6 +16,7 @@
#include "xfs_refcount_item.h"
#include "xfs_alloc.h"
#include "xfs_refcount.h"
+#include "xfs_defer.h"
/*
* This routine is called to allocate a "refcount update done"
@@ -227,8 +228,7 @@ xfs_refcount_update_cancel_item(
kmem_free(refc);
}
-static const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
- .type = XFS_DEFER_OPS_TYPE_REFCOUNT,
+const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
.max_items = XFS_CUI_MAX_FAST_EXTENTS,
.diff_items = xfs_refcount_update_diff_items,
.create_intent = xfs_refcount_update_create_intent,
@@ -239,10 +239,3 @@ static const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
.finish_cleanup = xfs_refcount_update_finish_cleanup,
.cancel_item = xfs_refcount_update_cancel_item,
};
-
-/* Register the deferred op type. */
-void
-xfs_refcount_update_init_defer_op(void)
-{
- xfs_defer_init_op_type(&xfs_refcount_update_defer_type);
-}
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
index 05b00e40251f..a42890931ecd 100644
--- a/fs/xfs/xfs_trans_rmap.c
+++ b/fs/xfs/xfs_trans_rmap.c
@@ -16,6 +16,7 @@
#include "xfs_rmap_item.h"
#include "xfs_alloc.h"
#include "xfs_rmap.h"
+#include "xfs_defer.h"
/* Set the map extent flags for this reverse mapping. */
static void
@@ -244,8 +245,7 @@ xfs_rmap_update_cancel_item(
kmem_free(rmap);
}
-static const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
- .type = XFS_DEFER_OPS_TYPE_RMAP,
+const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
.max_items = XFS_RUI_MAX_FAST_EXTENTS,
.diff_items = xfs_rmap_update_diff_items,
.create_intent = xfs_rmap_update_create_intent,
@@ -256,10 +256,3 @@ static const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
.finish_cleanup = xfs_rmap_update_finish_cleanup,
.cancel_item = xfs_rmap_update_cancel_item,
};
-
-/* Register the deferred op type. */
-void
-xfs_rmap_update_init_defer_op(void)
-{
- xfs_defer_init_op_type(&xfs_rmap_update_defer_type);
-}