aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/Kconfig.binfmt35
-rw-r--r--fs/Makefile1
-rw-r--r--fs/affs/super.c2
-rw-r--r--fs/afs/dir.c23
-rw-r--r--fs/aio.c9
-rw-r--r--fs/anon_inodes.c2
-rw-r--r--fs/bad_inode.c2
-rw-r--r--fs/binfmt_aout.c342
-rw-r--r--fs/binfmt_elf.c3
-rw-r--r--fs/btrfs/compression.c45
-rw-r--r--fs/btrfs/extent_io.c33
-rw-r--r--fs/btrfs/inode.c8
-rw-r--r--fs/btrfs/subpage.c2
-rw-r--r--fs/btrfs/tests/extent-io-tests.c32
-rw-r--r--fs/buffer.c195
-rw-r--r--fs/cachefiles/namei.c122
-rw-r--r--fs/ceph/caps.c14
-rw-r--r--fs/ceph/export.c3
-rw-r--r--fs/ceph/inode.c31
-rw-r--r--fs/ceph/mds_client.c11
-rw-r--r--fs/ceph/mds_client.h6
-rw-r--r--fs/ceph/mdsmap.c2
-rw-r--r--fs/cifs/cached_dir.c490
-rw-r--r--fs/cifs/cached_dir.h30
-rw-r--r--fs/cifs/cifs_debug.c4
-rw-r--r--fs/cifs/cifs_debug.h6
-rw-r--r--fs/cifs/cifs_ioctl.h8
-rw-r--r--fs/cifs/cifs_swn.c12
-rw-r--r--fs/cifs/cifsencrypt.c100
-rw-r--r--fs/cifs/cifsfs.c38
-rw-r--r--fs/cifs/cifsfs.h4
-rw-r--r--fs/cifs/cifsglob.h78
-rw-r--r--fs/cifs/cifspdu.h7
-rw-r--r--fs/cifs/cifsproto.h20
-rw-r--r--fs/cifs/cifssmb.c4
-rw-r--r--fs/cifs/connect.c44
-rw-r--r--fs/cifs/dfs_cache.c2
-rw-r--r--fs/cifs/dir.c46
-rw-r--r--fs/cifs/file.c78
-rw-r--r--fs/cifs/fs_context.c12
-rw-r--r--fs/cifs/fscache.c2
-rw-r--r--fs/cifs/inode.c179
-rw-r--r--fs/cifs/ioctl.c25
-rw-r--r--fs/cifs/link.c120
-rw-r--r--fs/cifs/misc.c69
-rw-r--r--fs/cifs/netlink.c1
-rw-r--r--fs/cifs/readdir.c31
-rw-r--r--fs/cifs/sess.c49
-rw-r--r--fs/cifs/smb1ops.c56
-rw-r--r--fs/cifs/smb2file.c127
-rw-r--r--fs/cifs/smb2inode.c174
-rw-r--r--fs/cifs/smb2misc.c96
-rw-r--r--fs/cifs/smb2ops.c218
-rw-r--r--fs/cifs/smb2pdu.c119
-rw-r--r--fs/cifs/smb2pdu.h3
-rw-r--r--fs/cifs/smb2proto.h25
-rw-r--r--fs/cifs/smb2transport.c117
-rw-r--r--fs/cifs/smbdirect.c227
-rw-r--r--fs/cifs/smbdirect.h14
-rw-r--r--fs/cifs/trace.h3
-rw-r--r--fs/cifs/transport.c5
-rw-r--r--fs/coredump.c82
-rw-r--r--fs/crypto/bio.c16
-rw-r--r--fs/crypto/fscrypt_private.h82
-rw-r--r--fs/crypto/hooks.c10
-rw-r--r--fs/crypto/inline_crypt.c196
-rw-r--r--fs/crypto/keyring.c500
-rw-r--r--fs/crypto/keysetup.c89
-rw-r--r--fs/crypto/keysetup_v1.c4
-rw-r--r--fs/crypto/policy.c21
-rw-r--r--fs/d_path.c5
-rw-r--r--fs/dcache.c17
-rw-r--r--fs/debugfs/file.c16
-rw-r--r--fs/debugfs/inode.c37
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/dlm/ast.c15
-rw-r--r--fs/dlm/ast.h1
-rw-r--r--fs/dlm/dlm_internal.h2
-rw-r--r--fs/dlm/lock.c167
-rw-r--r--fs/dlm/lock.h2
-rw-r--r--fs/dlm/lockspace.c32
-rw-r--r--fs/dlm/lockspace.h13
-rw-r--r--fs/dlm/lowcomms.c4
-rw-r--r--fs/dlm/netlink.c1
-rw-r--r--fs/dlm/user.c17
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/file.c40
-rw-r--r--fs/ecryptfs/inode.c2
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/efivarfs/vars.c16
-rw-r--r--fs/erofs/decompressor.c47
-rw-r--r--fs/erofs/decompressor_lzma.c3
-rw-r--r--fs/erofs/erofs_fs.h40
-rw-r--r--fs/erofs/fscache.c480
-rw-r--r--fs/erofs/inode.c26
-rw-r--r--fs/erofs/internal.h57
-rw-r--r--fs/erofs/namei.c13
-rw-r--r--fs/erofs/super.c96
-rw-r--r--fs/erofs/sysfs.c23
-rw-r--r--fs/erofs/xattr.h2
-rw-r--r--fs/erofs/zdata.c79
-rw-r--r--fs/erofs/zdata.h6
-rw-r--r--fs/erofs/zmap.c122
-rw-r--r--fs/eventfd.c10
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c26
-rw-r--r--fs/exfat/dir.c6
-rw-r--r--fs/exfat/inode.c2
-rw-r--r--fs/exportfs/expfs.c7
-rw-r--r--fs/ext2/balloc.c7
-rw-r--r--fs/ext2/ialloc.c3
-rw-r--r--fs/ext2/namei.c6
-rw-r--r--fs/ext2/super.c22
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/extents.c107
-rw-r--r--fs/ext4/extents_status.c3
-rw-r--r--fs/ext4/fast_commit.c215
-rw-r--r--fs/ext4/fast_commit.h3
-rw-r--r--fs/ext4/file.c43
-rw-r--r--fs/ext4/ialloc.c7
-rw-r--r--fs/ext4/inode.c54
-rw-r--r--fs/ext4/ioctl.c14
-rw-r--r--fs/ext4/mballoc.c3
-rw-r--r--fs/ext4/migrate.c6
-rw-r--r--fs/ext4/mmp.c2
-rw-r--r--fs/ext4/move_extent.c26
-rw-r--r--fs/ext4/namei.c33
-rw-r--r--fs/ext4/readpage.c10
-rw-r--r--fs/ext4/resize.c7
-rw-r--r--fs/ext4/super.c1244
-rw-r--r--fs/ext4/verity.c9
-rw-r--r--fs/ext4/xattr.c1
-rw-r--r--fs/f2fs/acl.c2
-rw-r--r--fs/f2fs/checkpoint.c65
-rw-r--r--fs/f2fs/compress.c32
-rw-r--r--fs/f2fs/data.c71
-rw-r--r--fs/f2fs/debug.c9
-rw-r--r--fs/f2fs/dir.c1
-rw-r--r--fs/f2fs/extent_cache.c9
-rw-r--r--fs/f2fs/f2fs.h88
-rw-r--r--fs/f2fs/file.c98
-rw-r--r--fs/f2fs/gc.c42
-rw-r--r--fs/f2fs/inline.c17
-rw-r--r--fs/f2fs/inode.c51
-rw-r--r--fs/f2fs/iostat.c74
-rw-r--r--fs/f2fs/iostat.h4
-rw-r--r--fs/f2fs/namei.c15
-rw-r--r--fs/f2fs/node.c9
-rw-r--r--fs/f2fs/recovery.c29
-rw-r--r--fs/f2fs/segment.c45
-rw-r--r--fs/f2fs/segment.h2
-rw-r--r--fs/f2fs/super.c120
-rw-r--r--fs/f2fs/sysfs.c9
-rw-r--r--fs/f2fs/verity.c15
-rw-r--r--fs/f2fs/xattr.c8
-rw-r--r--fs/fat/dir.c8
-rw-r--r--fs/fat/file.c5
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/fhandle.c2
-rw-r--r--fs/file.c1
-rw-r--r--fs/file_table.c7
-rw-r--r--fs/fs-writeback.c37
-rw-r--r--fs/fuse/dev.c3
-rw-r--r--fs/fuse/dir.c24
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/fuse/readdir.c10
-rw-r--r--fs/gfs2/export.c6
-rw-r--r--fs/gfs2/file.c29
-rw-r--r--fs/gfs2/glock.c257
-rw-r--r--fs/gfs2/glock.h2
-rw-r--r--fs/gfs2/inode.c13
-rw-r--r--fs/gfs2/lock_dlm.c2
-rw-r--r--fs/gfs2/main.c24
-rw-r--r--fs/gfs2/meta_io.c7
-rw-r--r--fs/gfs2/ops_fstype.c31
-rw-r--r--fs/gfs2/quota.c8
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/util.c12
-rw-r--r--fs/hfs/bnode.c32
-rw-r--r--fs/hfs/btree.c29
-rw-r--r--fs/hfsplus/bitmap.c20
-rw-r--r--fs/hfsplus/bnode.c105
-rw-r--r--fs/hfsplus/btree.c27
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hugetlbfs/inode.c342
-rw-r--r--fs/inode.c7
-rw-r--r--fs/internal.h15
-rw-r--r--fs/iomap/buffered-io.c3
-rw-r--r--fs/iomap/trace.h1
-rw-r--r--fs/isofs/compress.c22
-rw-r--r--fs/isofs/inode.c9
-rw-r--r--fs/jbd2/commit.c12
-rw-r--r--fs/jbd2/journal.c34
-rw-r--r--fs/jbd2/recovery.c17
-rw-r--r--fs/jbd2/transaction.c6
-rw-r--r--fs/jffs2/wbuf.c6
-rw-r--r--fs/kernfs/dir.c107
-rw-r--r--fs/kernfs/file.c151
-rw-r--r--fs/kernfs/kernfs-internal.h1
-rw-r--r--fs/ksmbd/auth.c15
-rw-r--r--fs/ksmbd/auth.h3
-rw-r--r--fs/ksmbd/connection.c8
-rw-r--r--fs/ksmbd/connection.h2
-rw-r--r--fs/ksmbd/ksmbd_netlink.h3
-rw-r--r--fs/ksmbd/mgmt/share_config.c36
-rw-r--r--fs/ksmbd/mgmt/share_config.h4
-rw-r--r--fs/ksmbd/mgmt/tree_connect.c6
-rw-r--r--fs/ksmbd/mgmt/tree_connect.h2
-rw-r--r--fs/ksmbd/misc.c46
-rw-r--r--fs/ksmbd/misc.h5
-rw-r--r--fs/ksmbd/ndr.c8
-rw-r--r--fs/ksmbd/oplock.c27
-rw-r--r--fs/ksmbd/server.c4
-rw-r--r--fs/ksmbd/smb2pdu.c142
-rw-r--r--fs/ksmbd/smb2pdu.h7
-rw-r--r--fs/ksmbd/smb_common.c6
-rw-r--r--fs/ksmbd/smbacl.c12
-rw-r--r--fs/ksmbd/smbacl.h18
-rw-r--r--fs/ksmbd/transport_ipc.c1
-rw-r--r--fs/ksmbd/transport_rdma.c8
-rw-r--r--fs/ksmbd/transport_tcp.c3
-rw-r--r--fs/ksmbd/unicode.h3
-rw-r--r--fs/ksmbd/vfs.c50
-rw-r--r--fs/ksmbd/vfs.h4
-rw-r--r--fs/libfs.c46
-rw-r--r--fs/lockd/host.c2
-rw-r--r--fs/lockd/svc4proc.c24
-rw-r--r--fs/lockd/svcproc.c24
-rw-r--r--fs/mbcache.c17
-rw-r--r--fs/minix/namei.c6
-rw-r--r--fs/namei.c94
-rw-r--r--fs/nfs/callback_xdr.c1
-rw-r--r--fs/nfs/client.c4
-rw-r--r--fs/nfs/delegation.c36
-rw-r--r--fs/nfs/dir.c7
-rw-r--r--fs/nfs/dns_resolve.c7
-rw-r--r--fs/nfs/dns_resolve.h2
-rw-r--r--fs/nfs/file.c9
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c109
-rw-r--r--fs/nfs/fs_context.c14
-rw-r--r--fs/nfs/inode.c18
-rw-r--r--fs/nfs/internal.h16
-rw-r--r--fs/nfs/mount_clnt.c4
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs3client.c4
-rw-r--r--fs/nfs/nfs3proc.c3
-rw-r--r--fs/nfs/nfs42proc.c9
-rw-r--r--fs/nfs/nfs42xattr.c2
-rw-r--r--fs/nfs/nfs42xdr.c8
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4client.c21
-rw-r--r--fs/nfs/nfs4idmap.c2
-rw-r--r--fs/nfs/nfs4namespace.c16
-rw-r--r--fs/nfs/nfs4proc.c28
-rw-r--r--fs/nfs/nfs4state.c15
-rw-r--r--fs/nfs/nfs4trace.h50
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/pnfs.c13
-rw-r--r--fs/nfs/pnfs.h9
-rw-r--r--fs/nfs/pnfs_nfs.c10
-rw-r--r--fs/nfs/super.c5
-rw-r--r--fs/nfsd/cache.h2
-rw-r--r--fs/nfsd/filecache.c98
-rw-r--r--fs/nfsd/filecache.h2
-rw-r--r--fs/nfsd/netns.h4
-rw-r--r--fs/nfsd/nfs2acl.c5
-rw-r--r--fs/nfsd/nfs3acl.c3
-rw-r--r--fs/nfsd/nfs3proc.c43
-rw-r--r--fs/nfsd/nfs3xdr.c18
-rw-r--r--fs/nfsd/nfs4callback.c14
-rw-r--r--fs/nfsd/nfs4idmap.c8
-rw-r--r--fs/nfsd/nfs4layouts.c2
-rw-r--r--fs/nfsd/nfs4proc.c226
-rw-r--r--fs/nfsd/nfs4recover.c22
-rw-r--r--fs/nfsd/nfs4state.c222
-rw-r--r--fs/nfsd/nfs4xdr.c102
-rw-r--r--fs/nfsd/nfscache.c13
-rw-r--r--fs/nfsd/nfsctl.c58
-rw-r--r--fs/nfsd/nfsd.h13
-rw-r--r--fs/nfsd/nfsfh.c8
-rw-r--r--fs/nfsd/nfsproc.c39
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/nfsxdr.c4
-rw-r--r--fs/nfsd/state.h11
-rw-r--r--fs/nfsd/stats.c14
-rw-r--r--fs/nfsd/trace.h131
-rw-r--r--fs/nfsd/vfs.c134
-rw-r--r--fs/nfsd/vfs.h4
-rw-r--r--fs/nfsd/xdr4.h9
-rw-r--r--fs/nilfs2/btree.c6
-rw-r--r--fs/nilfs2/inode.c19
-rw-r--r--fs/nilfs2/page.c45
-rw-r--r--fs/nilfs2/segment.c25
-rw-r--r--fs/notify/fanotify/fanotify.c2
-rw-r--r--fs/notify/fanotify/fanotify.h8
-rw-r--r--fs/notify/fanotify/fanotify_user.c6
-rw-r--r--fs/notify/fsnotify.h4
-rw-r--r--fs/nsfs.c2
-rw-r--r--fs/ntfs/attrib.c28
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/ntfs/inode.c7
-rw-r--r--fs/ntfs/super.c3
-rw-r--r--fs/ntfs3/bitmap.c4
-rw-r--r--fs/ntfs3/fslog.c6
-rw-r--r--fs/ntfs3/inode.c9
-rw-r--r--fs/ntfs3/xattr.c102
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/dir.c10
-rw-r--r--fs/ocfs2/journal.c14
-rw-r--r--fs/ocfs2/namei.c23
-rw-r--r--fs/ocfs2/ocfs2_fs.h8
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/stackglue.c4
-rw-r--r--fs/ocfs2/suballoc.h2
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/open.c11
-rw-r--r--fs/orangefs/dir.c2
-rw-r--r--fs/orangefs/file.c4
-rw-r--r--fs/overlayfs/copy_up.c116
-rw-r--r--fs/overlayfs/file.c2
-rw-r--r--fs/overlayfs/inode.c6
-rw-r--r--fs/overlayfs/namei.c4
-rw-r--r--fs/overlayfs/overlayfs.h36
-rw-r--r--fs/overlayfs/readdir.c44
-rw-r--r--fs/overlayfs/super.c33
-rw-r--r--fs/overlayfs/util.c10
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/posix_acl.c291
-rw-r--r--fs/proc/Kconfig1
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c24
-rw-r--r--fs/proc/devices.c6
-rw-r--r--fs/proc/internal.h7
-rw-r--r--fs/proc/kmsg.c2
-rw-r--r--fs/proc/loadavg.c6
-rw-r--r--fs/proc/meminfo.c7
-rw-r--r--fs/proc/page.c3
-rw-r--r--fs/proc/proc_sysctl.c9
-rw-r--r--fs/proc/softirqs.c6
-rw-r--r--fs/proc/task_mmu.c96
-rw-r--r--fs/proc/task_nommu.c45
-rw-r--r--fs/proc/uptime.c6
-rw-r--r--fs/proc/version.c6
-rw-r--r--fs/pstore/platform.c63
-rw-r--r--fs/qnx6/inode.c6
-rw-r--r--fs/quota/quota_tree.c73
-rw-r--r--fs/ramfs/file-nommu.c50
-rw-r--r--fs/ramfs/inode.c6
-rw-r--r--fs/read_write.c22
-rw-r--r--fs/readdir.c68
-rw-r--r--fs/reiserfs/journal.c11
-rw-r--r--fs/reiserfs/prints.c2
-rw-r--r--fs/reiserfs/procfs.c4
-rw-r--r--fs/reiserfs/resize.c2
-rw-r--r--fs/reiserfs/stree.c4
-rw-r--r--fs/reiserfs/super.c11
-rw-r--r--fs/reiserfs/xattr.c20
-rw-r--r--fs/smbfs_common/smb2pdu.h6
-rw-r--r--fs/squashfs/file.c23
-rw-r--r--fs/squashfs/page_actor.c3
-rw-r--r--fs/squashfs/page_actor.h6
-rw-r--r--fs/stat.c14
-rw-r--r--fs/super.c3
-rw-r--r--fs/ubifs/crypto.c11
-rw-r--r--fs/ubifs/debug.c10
-rw-r--r--fs/ubifs/dir.c34
-rw-r--r--fs/ubifs/journal.c30
-rw-r--r--fs/ubifs/lpt_commit.c14
-rw-r--r--fs/ubifs/tnc_commit.c2
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/udf/dir.c2
-rw-r--r--fs/udf/directory.c2
-rw-r--r--fs/udf/file.c1
-rw-r--r--fs/udf/inode.c8
-rw-r--r--fs/udf/namei.c6
-rw-r--r--fs/ufs/balloc.c12
-rw-r--r--fs/userfaultfd.c140
-rw-r--r--fs/verity/read_metadata.c6
-rw-r--r--fs/verity/verify.c14
-rw-r--r--fs/xattr.c15
-rw-r--r--fs/xfs/libxfs/xfs_ag.h15
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c8
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c50
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h4
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c9
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c4
-rw-r--r--fs/xfs/libxfs/xfs_format.h22
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c4
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h60
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c286
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h40
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c15
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c9
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c4
-rw-r--r--fs/xfs/libxfs/xfs_types.h30
-rw-r--r--fs/xfs/scrub/alloc.c4
-rw-r--r--fs/xfs/scrub/dir.c10
-rw-r--r--fs/xfs/scrub/ialloc.c5
-rw-r--r--fs/xfs/scrub/parent.c4
-rw-r--r--fs/xfs/scrub/refcount.c72
-rw-r--r--fs/xfs/xfs_attr_item.c73
-rw-r--r--fs/xfs/xfs_bmap_item.c54
-rw-r--r--fs/xfs/xfs_dir2_readdir.c2
-rw-r--r--fs/xfs/xfs_error.c11
-rw-r--r--fs/xfs/xfs_extfree_item.c94
-rw-r--r--fs/xfs/xfs_extfree_item.h16
-rw-r--r--fs/xfs/xfs_file.c7
-rw-r--r--fs/xfs/xfs_icache.c2
-rw-r--r--fs/xfs/xfs_inode.c15
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_inode_item_recover.c4
-rw-r--r--fs/xfs/xfs_iops.c32
-rw-r--r--fs/xfs/xfs_iops.h1
-rw-r--r--fs/xfs/xfs_itable.c8
-rw-r--r--fs/xfs/xfs_log.c12
-rw-r--r--fs/xfs/xfs_log_recover.c10
-rw-r--r--fs/xfs/xfs_mount.c38
-rw-r--r--fs/xfs/xfs_notify_failure.c32
-rw-r--r--fs/xfs/xfs_ondisk.h23
-rw-r--r--fs/xfs/xfs_refcount_item.c57
-rw-r--r--fs/xfs/xfs_reflink.c22
-rw-r--r--fs/xfs/xfs_rmap_item.c70
-rw-r--r--fs/xfs/xfs_stats.c4
-rw-r--r--fs/xfs/xfs_super.c22
-rw-r--r--fs/xfs/xfs_sysfs.h7
-rw-r--r--fs/xfs/xfs_trace.h52
-rw-r--r--fs/xfs/xfs_trans_ail.c11
434 files changed, 9134 insertions, 6435 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index a547307c1ae8..2685a4d0d353 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -235,6 +235,7 @@ config ARCH_SUPPORTS_HUGETLBFS
config HUGETLBFS
bool "HugeTLB file system support"
depends on X86 || IA64 || SPARC64 || ARCH_SUPPORTS_HUGETLBFS || BROKEN
+ depends on (SYSFS || SYSCTL)
help
hugetlbfs is a filesystem backing for HugeTLB pages, based on
ramfs. For architectures that support it, say Y here and read
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 21e154516bf2..93539aac0e5b 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -58,7 +58,7 @@ config ARCH_USE_GNU_PROPERTY
config BINFMT_ELF_FDPIC
bool "Kernel support for FDPIC ELF binaries"
default y if !BINFMT_ELF
- depends on ARM || ((M68K || SUPERH) && !MMU)
+ depends on ARM || ((M68K || SUPERH || XTENSA) && !MMU)
select ELFCORE
help
ELF FDPIC binaries are based on ELF, but allow the individual load
@@ -142,39 +142,6 @@ config BINFMT_ZFLAT
help
Support FLAT format compressed binaries
-config HAVE_AOUT
- def_bool n
-
-config BINFMT_AOUT
- tristate "Kernel support for a.out and ECOFF binaries"
- depends on HAVE_AOUT
- help
- A.out (Assembler.OUTput) is a set of formats for libraries and
- executables used in the earliest versions of UNIX. Linux used
- the a.out formats QMAGIC and ZMAGIC until they were replaced
- with the ELF format.
-
- The conversion to ELF started in 1995. This option is primarily
- provided for historical interest and for the benefit of those
- who need to run binaries from that era.
-
- Most people should answer N here. If you think you may have
- occasional use for this format, enable module support above
- and answer M here to compile this support as a module called
- binfmt_aout.
-
- If any crucial components of your system (such as /sbin/init
- or /lib/ld.so) are still in a.out format, you will have to
- say Y here.
-
-config OSF4_COMPAT
- bool "OSF/1 v4 readv/writev compatibility"
- depends on ALPHA && BINFMT_AOUT
- help
- Say Y if you are using OSF/1 binaries (like Netscape and Acrobat)
- with v4 shared libraries freely available from Compaq. If you're
- going to use shared libraries from Tru64 version 5.0 or later, say N.
-
config BINFMT_MISC
tristate "Kernel support for MISC binaries"
help
diff --git a/fs/Makefile b/fs/Makefile
index 93b80529f8e8..4dea17840761 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -38,7 +38,6 @@ obj-$(CONFIG_FS_DAX) += dax.o
obj-$(CONFIG_FS_ENCRYPTION) += crypto/
obj-$(CONFIG_FS_VERITY) += verity/
obj-$(CONFIG_FILE_LOCKING) += locks.o
-obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
obj-$(CONFIG_BINFMT_SCRIPT) += binfmt_script.o
obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4c5f30a83336..58b391446ae1 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -276,7 +276,7 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
char *vol = match_strdup(&args[0]);
if (!vol)
return 0;
- strlcpy(volume, vol, 32);
+ strscpy(volume, vol, 32);
kfree(vol);
break;
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 56ae5cd5184f..230c2d19116d 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -24,9 +24,9 @@ static int afs_readdir(struct file *file, struct dir_context *ctx);
static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
static int afs_d_delete(const struct dentry *dentry);
static void afs_d_iput(struct dentry *dentry, struct inode *inode);
-static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
+static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
loff_t fpos, u64 ino, unsigned dtype);
-static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
+static bool afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
loff_t fpos, u64 ino, unsigned dtype);
static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl);
@@ -568,7 +568,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx)
* - if afs_dir_iterate_block() spots this function, it'll pass the FID
* uniquifier through dtype
*/
-static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
+static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
int nlen, loff_t fpos, u64 ino, unsigned dtype)
{
struct afs_lookup_one_cookie *cookie =
@@ -584,16 +584,16 @@ static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
if (cookie->name.len != nlen ||
memcmp(cookie->name.name, name, nlen) != 0) {
- _leave(" = 0 [no]");
- return 0;
+ _leave(" = true [keep looking]");
+ return true;
}
cookie->fid.vnode = ino;
cookie->fid.unique = dtype;
cookie->found = 1;
- _leave(" = -1 [found]");
- return -1;
+ _leave(" = false [found]");
+ return false;
}
/*
@@ -636,12 +636,11 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
* - if afs_dir_iterate_block() spots this function, it'll pass the FID
* uniquifier through dtype
*/
-static int afs_lookup_filldir(struct dir_context *ctx, const char *name,
+static bool afs_lookup_filldir(struct dir_context *ctx, const char *name,
int nlen, loff_t fpos, u64 ino, unsigned dtype)
{
struct afs_lookup_cookie *cookie =
container_of(ctx, struct afs_lookup_cookie, ctx);
- int ret;
_enter("{%s,%u},%s,%u,,%llu,%u",
cookie->name.name, cookie->name.len, name, nlen,
@@ -663,12 +662,10 @@ static int afs_lookup_filldir(struct dir_context *ctx, const char *name,
cookie->fids[1].unique = dtype;
cookie->found = 1;
if (cookie->one_only)
- return -1;
+ return false;
}
- ret = cookie->nr_fids >= 50 ? -1 : 0;
- _leave(" = %d", ret);
- return ret;
+ return cookie->nr_fids < 50;
}
/*
diff --git a/fs/aio.c b/fs/aio.c
index 606613e9d1f4..5b2ff20ad322 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -951,16 +951,13 @@ static bool __get_reqs_available(struct kioctx *ctx)
local_irq_save(flags);
kcpu = this_cpu_ptr(ctx->cpu);
if (!kcpu->reqs_available) {
- int old, avail = atomic_read(&ctx->reqs_available);
+ int avail = atomic_read(&ctx->reqs_available);
do {
if (avail < ctx->req_batch)
goto out;
-
- old = avail;
- avail = atomic_cmpxchg(&ctx->reqs_available,
- avail, avail - ctx->req_batch);
- } while (avail != old);
+ } while (!atomic_try_cmpxchg(&ctx->reqs_available,
+ &avail, avail - ctx->req_batch));
kcpu->reqs_available += ctx->req_batch;
}
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index e0c3e33c4177..24192a7667ed 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -32,7 +32,7 @@ static struct inode *anon_inode_inode;
*/
static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
{
- return dynamic_dname(dentry, buffer, buflen, "anon_inode:%s",
+ return dynamic_dname(buffer, buflen, "anon_inode:%s",
dentry->d_name.name);
}
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 12b8fdcc445b..9d1cde8066cf 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -147,7 +147,7 @@ static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry,
}
static int bad_inode_tmpfile(struct user_namespace *mnt_userns,
- struct inode *inode, struct dentry *dentry,
+ struct inode *inode, struct file *file,
umode_t mode)
{
return -EIO;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
deleted file mode 100644
index 0dcfc691e7e2..000000000000
--- a/fs/binfmt_aout.c
+++ /dev/null
@@ -1,342 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/fs/binfmt_aout.c
- *
- * Copyright (C) 1991, 1992, 1996 Linus Torvalds
- */
-
-#include <linux/module.h>
-
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/a.out.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/binfmts.h>
-#include <linux/personality.h>
-#include <linux/init.h>
-#include <linux/coredump.h>
-#include <linux/slab.h>
-#include <linux/sched/task_stack.h>
-
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-
-static int load_aout_binary(struct linux_binprm *);
-static int load_aout_library(struct file*);
-
-static struct linux_binfmt aout_format = {
- .module = THIS_MODULE,
- .load_binary = load_aout_binary,
- .load_shlib = load_aout_library,
-};
-
-#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
-
-static int set_brk(unsigned long start, unsigned long end)
-{
- start = PAGE_ALIGN(start);
- end = PAGE_ALIGN(end);
- if (end > start)
- return vm_brk(start, end - start);
- return 0;
-}
-
-/*
- * create_aout_tables() parses the env- and arg-strings in new user
- * memory and creates the pointer tables from them, and puts their
- * addresses on the "stack", returning the new stack pointer value.
- */
-static unsigned long __user *create_aout_tables(char __user *p, struct linux_binprm * bprm)
-{
- char __user * __user *argv;
- char __user * __user *envp;
- unsigned long __user *sp;
- int argc = bprm->argc;
- int envc = bprm->envc;
-
- sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p);
-#ifdef __alpha__
-/* whee.. test-programs are so much fun. */
- put_user(0, --sp);
- put_user(0, --sp);
- if (bprm->loader) {
- put_user(0, --sp);
- put_user(1003, --sp);
- put_user(bprm->loader, --sp);
- put_user(1002, --sp);
- }
- put_user(bprm->exec, --sp);
- put_user(1001, --sp);
-#endif
- sp -= envc+1;
- envp = (char __user * __user *) sp;
- sp -= argc+1;
- argv = (char __user * __user *) sp;
-#ifndef __alpha__
- put_user((unsigned long) envp,--sp);
- put_user((unsigned long) argv,--sp);
-#endif
- put_user(argc,--sp);
- current->mm->arg_start = (unsigned long) p;
- while (argc-->0) {
- char c;
- put_user(p,argv++);
- do {
- get_user(c,p++);
- } while (c);
- }
- put_user(NULL,argv);
- current->mm->arg_end = current->mm->env_start = (unsigned long) p;
- while (envc-->0) {
- char c;
- put_user(p,envp++);
- do {
- get_user(c,p++);
- } while (c);
- }
- put_user(NULL,envp);
- current->mm->env_end = (unsigned long) p;
- return sp;
-}
-
-/*
- * These are the functions used to load a.out style executables and shared
- * libraries. There is no binary dependent code anywhere else.
- */
-
-static int load_aout_binary(struct linux_binprm * bprm)
-{
- struct pt_regs *regs = current_pt_regs();
- struct exec ex;
- unsigned long error;
- unsigned long fd_offset;
- unsigned long rlim;
- int retval;
-
- ex = *((struct exec *) bprm->buf); /* exec-header */
- if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
- N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
- N_TRSIZE(ex) || N_DRSIZE(ex) ||
- i_size_read(file_inode(bprm->file)) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
- return -ENOEXEC;
- }
-
- /*
- * Requires a mmap handler. This prevents people from using a.out
- * as part of an exploit attack against /proc-related vulnerabilities.
- */
- if (!bprm->file->f_op->mmap)
- return -ENOEXEC;
-
- fd_offset = N_TXTOFF(ex);
-
- /* Check initial limits. This avoids letting people circumvent
- * size limits imposed on them by creating programs with large
- * arrays in the data or bss.
- */
- rlim = rlimit(RLIMIT_DATA);
- if (rlim >= RLIM_INFINITY)
- rlim = ~0;
- if (ex.a_data + ex.a_bss > rlim)
- return -ENOMEM;
-
- /* Flush all traces of the currently running executable */
- retval = begin_new_exec(bprm);
- if (retval)
- return retval;
-
- /* OK, This is the point of no return */
-#ifdef __alpha__
- SET_AOUT_PERSONALITY(bprm, ex);
-#else
- set_personality(PER_LINUX);
-#endif
- setup_new_exec(bprm);
-
- current->mm->end_code = ex.a_text +
- (current->mm->start_code = N_TXTADDR(ex));
- current->mm->end_data = ex.a_data +
- (current->mm->start_data = N_DATADDR(ex));
- current->mm->brk = ex.a_bss +
- (current->mm->start_brk = N_BSSADDR(ex));
-
- retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
- if (retval < 0)
- return retval;
-
-
- if (N_MAGIC(ex) == OMAGIC) {
- unsigned long text_addr, map_size;
- loff_t pos;
-
- text_addr = N_TXTADDR(ex);
-
-#ifdef __alpha__
- pos = fd_offset;
- map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1;
-#else
- pos = 32;
- map_size = ex.a_text+ex.a_data;
-#endif
- error = vm_brk(text_addr & PAGE_MASK, map_size);
- if (error)
- return error;
-
- error = read_code(bprm->file, text_addr, pos,
- ex.a_text+ex.a_data);
- if ((signed long)error < 0)
- return error;
- } else {
- if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
- (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
- {
- printk(KERN_NOTICE "executable not page aligned\n");
- }
-
- if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit())
- {
- printk(KERN_WARNING
- "fd_offset is not page aligned. Please convert program: %pD\n",
- bprm->file);
- }
-
- if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
- error = vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
- if (error)
- return error;
-
- read_code(bprm->file, N_TXTADDR(ex), fd_offset,
- ex.a_text + ex.a_data);
- goto beyond_if;
- }
-
- error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
- PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE,
- fd_offset);
-
- if (error != N_TXTADDR(ex))
- return error;
-
- error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE,
- fd_offset + ex.a_text);
- if (error != N_DATADDR(ex))
- return error;
- }
-beyond_if:
- set_binfmt(&aout_format);
-
- retval = set_brk(current->mm->start_brk, current->mm->brk);
- if (retval < 0)
- return retval;
-
- current->mm->start_stack =
- (unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
-#ifdef __alpha__
- regs->gp = ex.a_gpvalue;
-#endif
- finalize_exec(bprm);
- start_thread(regs, ex.a_entry, current->mm->start_stack);
- return 0;
-}
-
-static int load_aout_library(struct file *file)
-{
- struct inode * inode;
- unsigned long bss, start_addr, len;
- unsigned long error;
- int retval;
- struct exec ex;
- loff_t pos = 0;
-
- inode = file_inode(file);
-
- retval = -ENOEXEC;
- error = kernel_read(file, &ex, sizeof(ex), &pos);
- if (error != sizeof(ex))
- goto out;
-
- /* We come in here for the regular a.out style of shared libraries */
- if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
- N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
- i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
- goto out;
- }
-
- /*
- * Requires a mmap handler. This prevents people from using a.out
- * as part of an exploit attack against /proc-related vulnerabilities.
- */
- if (!file->f_op->mmap)
- goto out;
-
- if (N_FLAGS(ex))
- goto out;
-
- /* For QMAGIC, the starting address is 0x20 into the page. We mask
- this off to get the starting address for the page */
-
- start_addr = ex.a_entry & 0xfffff000;
-
- if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
- if (printk_ratelimit())
- {
- printk(KERN_WARNING
- "N_TXTOFF is not page aligned. Please convert library: %pD\n",
- file);
- }
- retval = vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
- if (retval)
- goto out;
-
- read_code(file, start_addr, N_TXTOFF(ex),
- ex.a_text + ex.a_data);
- retval = 0;
- goto out;
- }
- /* Now use mmap to map the library into memory. */
- error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE,
- N_TXTOFF(ex));
- retval = error;
- if (error != start_addr)
- goto out;
-
- len = PAGE_ALIGN(ex.a_text + ex.a_data);
- bss = ex.a_text + ex.a_data + ex.a_bss;
- if (bss > len) {
- retval = vm_brk(start_addr + len, bss - len);
- if (retval)
- goto out;
- }
- retval = 0;
-out:
- return retval;
-}
-
-static int __init init_aout_binfmt(void)
-{
- register_binfmt(&aout_format);
- return 0;
-}
-
-static void __exit exit_aout_binfmt(void)
-{
- unregister_binfmt(&aout_format);
-}
-
-core_initcall(init_aout_binfmt);
-module_exit(exit_aout_binfmt);
-MODULE_LICENSE("GPL");
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 63c7ebb0da89..6a11025e5850 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -911,7 +911,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
if (!interp_elf_ex) {
retval = -ENOMEM;
- goto out_free_ph;
+ goto out_free_file;
}
/* Get the exec headers */
@@ -1354,6 +1354,7 @@ out:
out_free_dentry:
kfree(interp_elf_ex);
kfree(interp_elf_phdata);
+out_free_file:
allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 54caa00a2245..f1f051ad3147 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -8,6 +8,7 @@
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
+#include <linux/pagevec.h>
#include <linux/highmem.h>
#include <linux/kthread.h>
#include <linux/time.h>
@@ -15,6 +16,7 @@
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/writeback.h>
+#include <linux/psi.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/log2.h>
@@ -218,8 +220,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
unsigned long index = cb->start >> PAGE_SHIFT;
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
- struct page *pages[16];
- unsigned long nr_pages = end_index - index + 1;
+ struct folio_batch fbatch;
const int errno = blk_status_to_errno(cb->status);
int i;
int ret;
@@ -227,24 +228,23 @@ static noinline void end_compressed_writeback(struct inode *inode,
if (errno)
mapping_set_error(inode->i_mapping, errno);
- while (nr_pages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long,
- nr_pages, ARRAY_SIZE(pages)), pages);
- if (ret == 0) {
- nr_pages -= 1;
- index += 1;
- continue;
- }
+ folio_batch_init(&fbatch);
+ while (index <= end_index) {
+ ret = filemap_get_folios(inode->i_mapping, &index, end_index,
+ &fbatch);
+
+ if (ret == 0)
+ return;
+
for (i = 0; i < ret; i++) {
+ struct folio *folio = fbatch.folios[i];
+
if (errno)
- SetPageError(pages[i]);
- btrfs_page_clamp_clear_writeback(fs_info, pages[i],
+ folio_set_error(folio);
+ btrfs_page_clamp_clear_writeback(fs_info, &folio->page,
cb->start, cb->len);
- put_page(pages[i]);
}
- nr_pages -= ret;
- index += ret;
+ folio_batch_release(&fbatch);
}
/* the inode may be gone now */
}
@@ -511,7 +511,8 @@ static u64 bio_end_offset(struct bio *bio)
*/
static noinline int add_ra_bio_pages(struct inode *inode,
u64 compressed_end,
- struct compressed_bio *cb)
+ struct compressed_bio *cb,
+ unsigned long *pflags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
unsigned long end_index;
@@ -580,6 +581,9 @@ static noinline int add_ra_bio_pages(struct inode *inode,
continue;
}
+ if (PageWorkingset(page))
+ psi_memstall_enter(pflags);
+
ret = set_page_extent_mapped(page);
if (ret < 0) {
unlock_page(page);
@@ -666,6 +670,8 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
u64 em_len;
u64 em_start;
struct extent_map *em;
+ /* Initialize to 1 to make skip psi_memstall_leave unless needed */
+ unsigned long pflags = 1;
blk_status_t ret;
int ret2;
int i;
@@ -721,7 +727,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
goto fail;
}
- add_ra_bio_pages(inode, em_start + em_len, cb);
+ add_ra_bio_pages(inode, em_start + em_len, cb, &pflags);
/* include any pages we added in add_ra-bio_pages */
cb->len = bio->bi_iter.bi_size;
@@ -801,6 +807,9 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
}
}
+ if (!pflags)
+ psi_memstall_leave(&pflags);
+
if (refcount_dec_and_test(&cb->pending_ios))
finish_compressed_bio_read(cb);
return;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1eae68fbae21..4dcf22e051ff 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -270,9 +270,8 @@ static int __process_pages_contig(struct address_space *mapping,
pgoff_t start_index = start >> PAGE_SHIFT;
pgoff_t end_index = end >> PAGE_SHIFT;
pgoff_t index = start_index;
- unsigned long nr_pages = end_index - start_index + 1;
unsigned long pages_processed = 0;
- struct page *pages[16];
+ struct folio_batch fbatch;
int err = 0;
int i;
@@ -281,16 +280,17 @@ static int __process_pages_contig(struct address_space *mapping,
ASSERT(processed_end && *processed_end == start);
}
- if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
+ if ((page_ops & PAGE_SET_ERROR) && start_index <= end_index)
mapping_set_error(mapping, -EIO);
- while (nr_pages > 0) {
- int found_pages;
+ folio_batch_init(&fbatch);
+ while (index <= end_index) {
+ int found_folios;
+
+ found_folios = filemap_get_folios_contig(mapping, &index,
+ end_index, &fbatch);
- found_pages = find_get_pages_contig(mapping, index,
- min_t(unsigned long,
- nr_pages, ARRAY_SIZE(pages)), pages);
- if (found_pages == 0) {
+ if (found_folios == 0) {
/*
* Only if we're going to lock these pages, we can find
* nothing at @index.
@@ -300,23 +300,20 @@ static int __process_pages_contig(struct address_space *mapping,
goto out;
}
- for (i = 0; i < found_pages; i++) {
+ for (i = 0; i < found_folios; i++) {
int process_ret;
-
+ struct folio *folio = fbatch.folios[i];
process_ret = process_one_page(fs_info, mapping,
- pages[i], locked_page, page_ops,
+ &folio->page, locked_page, page_ops,
start, end);
if (process_ret < 0) {
- for (; i < found_pages; i++)
- put_page(pages[i]);
err = -EAGAIN;
+ folio_batch_release(&fbatch);
goto out;
}
- put_page(pages[i]);
- pages_processed++;
+ pages_processed += folio_nr_pages(folio);
}
- nr_pages -= found_pages;
- index += found_pages;
+ folio_batch_release(&fbatch);
cond_resched();
}
out:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d70f85b73169..0e516aefbf51 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10026,7 +10026,7 @@ static int btrfs_permission(struct user_namespace *mnt_userns,
}
static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+ struct file *file, umode_t mode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct btrfs_trans_handle *trans;
@@ -10034,7 +10034,7 @@ static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
struct inode *inode;
struct btrfs_new_inode_args new_inode_args = {
.dir = dir,
- .dentry = dentry,
+ .dentry = file->f_path.dentry,
.orphan = true,
};
unsigned int trans_num_items;
@@ -10071,7 +10071,7 @@ static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
set_nlink(inode, 1);
if (!ret) {
- d_tmpfile(dentry, inode);
+ d_tmpfile(file, inode);
unlock_new_inode(inode);
mark_inode_dirty(inode);
}
@@ -10083,7 +10083,7 @@ out_new_inode_args:
out_inode:
if (ret)
iput(inode);
- return ret;
+ return finish_open_simple(file, ret);
}
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end)
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 6fc2b77ae5c3..9a176af847d7 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -337,7 +337,7 @@ bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
*
* Even with 0 returned, the page still need extra check to make sure
* it's really the correct page, as the caller is using
- * find_get_pages_contig(), which can race with page invalidating.
+ * filemap_get_folios_contig(), which can race with page invalidating.
*/
int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index f69ec4d2d6eb..350da449db08 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -4,6 +4,7 @@
*/
#include <linux/pagemap.h>
+#include <linux/pagevec.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/sizes.h>
@@ -20,39 +21,40 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
unsigned long flags)
{
int ret;
- struct page *pages[16];
+ struct folio_batch fbatch;
unsigned long index = start >> PAGE_SHIFT;
unsigned long end_index = end >> PAGE_SHIFT;
- unsigned long nr_pages = end_index - index + 1;
int i;
int count = 0;
int loops = 0;
- while (nr_pages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long, nr_pages,
- ARRAY_SIZE(pages)), pages);
+ folio_batch_init(&fbatch);
+
+ while (index <= end_index) {
+ ret = filemap_get_folios_contig(inode->i_mapping, &index,
+ end_index, &fbatch);
for (i = 0; i < ret; i++) {
+ struct folio *folio = fbatch.folios[i];
+
if (flags & PROCESS_TEST_LOCKED &&
- !PageLocked(pages[i]))
+ !folio_test_locked(folio))
count++;
- if (flags & PROCESS_UNLOCK && PageLocked(pages[i]))
- unlock_page(pages[i]);
- put_page(pages[i]);
+ if (flags & PROCESS_UNLOCK && folio_test_locked(folio))
+ folio_unlock(folio);
if (flags & PROCESS_RELEASE)
- put_page(pages[i]);
+ folio_put(folio);
}
- nr_pages -= ret;
- index += ret;
+ folio_batch_release(&fbatch);
cond_resched();
loops++;
if (loops > 100000) {
printk(KERN_ERR
- "stuck in a loop, start %llu, end %llu, nr_pages %lu, ret %d\n",
- start, end, nr_pages, ret);
+ "stuck in a loop, start %llu, end %llu, ret %d\n",
+ start, end, ret);
break;
}
}
+
return count;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 55e762a58eb6..d9c6d1fbb6dd 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -52,8 +52,8 @@
#include "internal.h"
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
-static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
- struct writeback_control *wbc);
+static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
+ struct writeback_control *wbc);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
@@ -152,7 +152,7 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
/*
* Default synchronous end-of-IO handler.. Just mark it up-to-date and
- * unlock the buffer. This is what ll_rw_block uses too.
+ * unlock the buffer.
*/
void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
{
@@ -491,8 +491,8 @@ int inode_has_buffers(struct inode *inode)
* all already-submitted IO to complete, but does not queue any new
* writes to the disk.
*
- * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
- * you dirty the buffers, and then use osync_inode_buffers to wait for
+ * To do O_SYNC writes, just queue the buffer writes with write_dirty_buffer
+ * as you dirty the buffers, and then use osync_inode_buffers to wait for
* completion. Any other dirty buffers which are not yet queued for
* write will not be flushed to disk by the osync.
*/
@@ -562,7 +562,7 @@ void write_boundary_block(struct block_device *bdev,
struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
if (bh) {
if (buffer_dirty(bh))
- ll_rw_block(REQ_OP_WRITE, 1, &bh);
+ write_dirty_buffer(bh, 0);
put_bh(bh);
}
}
@@ -1342,23 +1342,12 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = __getblk(bdev, block, size);
if (likely(bh)) {
- ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, &bh);
+ bh_readahead(bh, REQ_RAHEAD);
brelse(bh);
}
}
EXPORT_SYMBOL(__breadahead);
-void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
- gfp_t gfp)
-{
- struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
- if (likely(bh)) {
- ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, &bh);
- brelse(bh);
- }
-}
-EXPORT_SYMBOL(__breadahead_gfp);
-
/**
* __bread_gfp() - reads a specified block and returns the bh
* @bdev: the block_device to read from
@@ -1464,19 +1453,15 @@ EXPORT_SYMBOL(set_bh_page);
static void discard_buffer(struct buffer_head * bh)
{
- unsigned long b_state, b_state_old;
+ unsigned long b_state;
lock_buffer(bh);
clear_buffer_dirty(bh);
bh->b_bdev = NULL;
- b_state = bh->b_state;
- for (;;) {
- b_state_old = cmpxchg(&bh->b_state, b_state,
- (b_state & ~BUFFER_FLAGS_DISCARD));
- if (b_state_old == b_state)
- break;
- b_state = b_state_old;
- }
+ b_state = READ_ONCE(bh->b_state);
+ do {
+ } while (!try_cmpxchg(&bh->b_state, &b_state,
+ b_state & ~BUFFER_FLAGS_DISCARD));
unlock_buffer(bh);
}
@@ -1817,7 +1802,7 @@ done:
/*
* The page was marked dirty, but the buffers were
* clean. Someone wrote them back by hand with
- * ll_rw_block/submit_bh. A rare case.
+ * write_dirty_buffer/submit_bh. A rare case.
*/
end_page_writeback(page);
@@ -2033,7 +2018,7 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
!buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
- ll_rw_block(REQ_OP_READ, 1, &bh);
+ bh_read_nowait(bh, 0);
*wait_bh++=bh;
}
}
@@ -2352,7 +2337,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
struct address_space *mapping = inode->i_mapping;
const struct address_space_operations *aops = mapping->a_ops;
struct page *page;
- void *fsdata;
+ void *fsdata = NULL;
int err;
err = inode_newsize_ok(inode, size);
@@ -2378,7 +2363,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
const struct address_space_operations *aops = mapping->a_ops;
unsigned int blocksize = i_blocksize(inode);
struct page *page;
- void *fsdata;
+ void *fsdata = NULL;
pgoff_t index, curidx;
loff_t curpos;
unsigned zerofrom, offset, len;
@@ -2593,11 +2578,9 @@ int block_truncate_page(struct address_space *mapping,
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
- err = -EIO;
- ll_rw_block(REQ_OP_READ, 1, &bh);
- wait_on_buffer(bh);
+ err = bh_read(bh, 0);
/* Uhhuh. Read error. Complain and punt. */
- if (!buffer_uptodate(bh))
+ if (err < 0)
goto unlock;
}
@@ -2673,8 +2656,8 @@ static void end_bio_bh_io_sync(struct bio *bio)
bio_put(bio);
}
-static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
- struct writeback_control *wbc)
+static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
+ struct writeback_control *wbc)
{
const enum req_op op = opf & REQ_OP_MASK;
struct bio *bio;
@@ -2717,70 +2700,14 @@ static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
}
submit_bio(bio);
- return 0;
}
-int submit_bh(blk_opf_t opf, struct buffer_head *bh)
+void submit_bh(blk_opf_t opf, struct buffer_head *bh)
{
- return submit_bh_wbc(opf, bh, NULL);
+ submit_bh_wbc(opf, bh, NULL);
}
EXPORT_SYMBOL(submit_bh);
-/**
- * ll_rw_block: low-level access to block devices (DEPRECATED)
- * @opf: block layer request operation and flags.
- * @nr: number of &struct buffer_heads in the array
- * @bhs: array of pointers to &struct buffer_head
- *
- * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
- * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE.
- * @opf contains flags modifying the detailed I/O behavior, most notably
- * %REQ_RAHEAD.
- *
- * This function drops any buffer that it cannot get a lock on (with the
- * BH_Lock state bit), any buffer that appears to be clean when doing a write
- * request, and any buffer that appears to be up-to-date when doing read
- * request. Further it marks as clean buffers that are processed for
- * writing (the buffer cache won't assume that they are actually clean
- * until the buffer gets unlocked).
- *
- * ll_rw_block sets b_end_io to simple completion handler that marks
- * the buffer up-to-date (if appropriate), unlocks the buffer and wakes
- * any waiters.
- *
- * All of the buffers must be for the same device, and must also be a
- * multiple of the current approved size for the device.
- */
-void ll_rw_block(const blk_opf_t opf, int nr, struct buffer_head *bhs[])
-{
- const enum req_op op = opf & REQ_OP_MASK;
- int i;
-
- for (i = 0; i < nr; i++) {
- struct buffer_head *bh = bhs[i];
-
- if (!trylock_buffer(bh))
- continue;
- if (op == REQ_OP_WRITE) {
- if (test_clear_buffer_dirty(bh)) {
- bh->b_end_io = end_buffer_write_sync;
- get_bh(bh);
- submit_bh(opf, bh);
- continue;
- }
- } else {
- if (!buffer_uptodate(bh)) {
- bh->b_end_io = end_buffer_read_sync;
- get_bh(bh);
- submit_bh(opf, bh);
- continue;
- }
- }
- unlock_buffer(bh);
- }
-}
-EXPORT_SYMBOL(ll_rw_block);
-
void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
{
lock_buffer(bh);
@@ -2801,8 +2728,6 @@ EXPORT_SYMBOL(write_dirty_buffer);
*/
int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
{
- int ret = 0;
-
WARN_ON(atomic_read(&bh->b_count) < 1);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
@@ -2817,14 +2742,14 @@ int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
- ret = submit_bh(REQ_OP_WRITE | op_flags, bh);
+ submit_bh(REQ_OP_WRITE | op_flags, bh);
wait_on_buffer(bh);
- if (!ret && !buffer_uptodate(bh))
- ret = -EIO;
+ if (!buffer_uptodate(bh))
+ return -EIO;
} else {
unlock_buffer(bh);
}
- return ret;
+ return 0;
}
EXPORT_SYMBOL(__sync_dirty_buffer);
@@ -3029,29 +2954,69 @@ int bh_uptodate_or_lock(struct buffer_head *bh)
EXPORT_SYMBOL(bh_uptodate_or_lock);
/**
- * bh_submit_read - Submit a locked buffer for reading
+ * __bh_read - Submit read for a locked buffer
* @bh: struct buffer_head
+ * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
+ * @wait: wait until reading finish
*
- * Returns zero on success and -EIO on error.
+ * Returns zero on success or don't wait, and -EIO on error.
*/
-int bh_submit_read(struct buffer_head *bh)
+int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
{
- BUG_ON(!buffer_locked(bh));
+ int ret = 0;
- if (buffer_uptodate(bh)) {
- unlock_buffer(bh);
- return 0;
- }
+ BUG_ON(!buffer_locked(bh));
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, bh);
- wait_on_buffer(bh);
- if (buffer_uptodate(bh))
- return 0;
- return -EIO;
+ submit_bh(REQ_OP_READ | op_flags, bh);
+ if (wait) {
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh))
+ ret = -EIO;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(__bh_read);
+
+/**
+ * __bh_read_batch - Submit read for a batch of unlocked buffers
+ * @nr: entry number of the buffer batch
+ * @bhs: a batch of struct buffer_head
+ * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
+ * @force_lock: force to get a lock on the buffer if set, otherwise drops any
+ * buffer that cannot lock.
+ *
+ * Returns zero on success or don't wait, and -EIO on error.
+ */
+void __bh_read_batch(int nr, struct buffer_head *bhs[],
+ blk_opf_t op_flags, bool force_lock)
+{
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ struct buffer_head *bh = bhs[i];
+
+ if (buffer_uptodate(bh))
+ continue;
+
+ if (force_lock)
+ lock_buffer(bh);
+ else
+ if (!trylock_buffer(bh))
+ continue;
+
+ if (buffer_uptodate(bh)) {
+ unlock_buffer(bh);
+ continue;
+ }
+
+ bh->b_end_io = end_buffer_read_sync;
+ get_bh(bh);
+ submit_bh(REQ_OP_READ | op_flags, bh);
+ }
}
-EXPORT_SYMBOL(bh_submit_read);
+EXPORT_SYMBOL(__bh_read_batch);
void __init buffer_init(void)
{
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index facf2ebe464b..03ca8f2f657a 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -15,9 +15,8 @@
* file or directory. The caller must hold the inode lock.
*/
static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object,
- struct dentry *dentry)
+ struct inode *inode)
{
- struct inode *inode = d_backing_inode(dentry);
bool can_use = false;
if (!(inode->i_flags & S_KERNEL_FILE)) {
@@ -26,21 +25,18 @@ static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object,
can_use = true;
} else {
trace_cachefiles_mark_failed(object, inode);
- pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
- dentry, inode->i_ino);
}
return can_use;
}
static bool cachefiles_mark_inode_in_use(struct cachefiles_object *object,
- struct dentry *dentry)
+ struct inode *inode)
{
- struct inode *inode = d_backing_inode(dentry);
bool can_use;
inode_lock(inode);
- can_use = __cachefiles_mark_inode_in_use(object, dentry);
+ can_use = __cachefiles_mark_inode_in_use(object, inode);
inode_unlock(inode);
return can_use;
}
@@ -49,21 +45,17 @@ static bool cachefiles_mark_inode_in_use(struct cachefiles_object *object,
* Unmark a backing inode. The caller must hold the inode lock.
*/
static void __cachefiles_unmark_inode_in_use(struct cachefiles_object *object,
- struct dentry *dentry)
+ struct inode *inode)
{
- struct inode *inode = d_backing_inode(dentry);
-
inode->i_flags &= ~S_KERNEL_FILE;
trace_cachefiles_mark_inactive(object, inode);
}
static void cachefiles_do_unmark_inode_in_use(struct cachefiles_object *object,
- struct dentry *dentry)
+ struct inode *inode)
{
- struct inode *inode = d_backing_inode(dentry);
-
inode_lock(inode);
- __cachefiles_unmark_inode_in_use(object, dentry);
+ __cachefiles_unmark_inode_in_use(object, inode);
inode_unlock(inode);
}
@@ -77,14 +69,12 @@ void cachefiles_unmark_inode_in_use(struct cachefiles_object *object,
struct cachefiles_cache *cache = object->volume->cache;
struct inode *inode = file_inode(file);
- if (inode) {
- cachefiles_do_unmark_inode_in_use(object, file->f_path.dentry);
+ cachefiles_do_unmark_inode_in_use(object, inode);
- if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) {
- atomic_long_add(inode->i_blocks, &cache->b_released);
- if (atomic_inc_return(&cache->f_released))
- cachefiles_state_changed(cache);
- }
+ if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) {
+ atomic_long_add(inode->i_blocks, &cache->b_released);
+ if (atomic_inc_return(&cache->f_released))
+ cachefiles_state_changed(cache);
}
}
@@ -164,8 +154,11 @@ retry:
inode_lock(d_inode(subdir));
inode_unlock(d_inode(dir));
- if (!__cachefiles_mark_inode_in_use(NULL, subdir))
+ if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) {
+ pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
+ subdir, d_inode(subdir)->i_ino);
goto mark_error;
+ }
inode_unlock(d_inode(subdir));
@@ -224,9 +217,7 @@ nomem_d_alloc:
void cachefiles_put_directory(struct dentry *dir)
{
if (dir) {
- inode_lock(dir->d_inode);
- __cachefiles_unmark_inode_in_use(NULL, dir);
- inode_unlock(dir->d_inode);
+ cachefiles_do_unmark_inode_in_use(NULL, d_inode(dir));
dput(dir);
}
}
@@ -410,7 +401,7 @@ try_again:
"Rename failed with error %d", ret);
}
- __cachefiles_unmark_inode_in_use(object, rep);
+ __cachefiles_unmark_inode_in_use(object, d_inode(rep));
unlock_rename(cache->graveyard, dir);
dput(grave);
_leave(" = 0");
@@ -451,84 +442,72 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
const struct cred *saved_cred;
struct dentry *fan = volume->fanout[(u8)object->cookie->key_hash];
struct file *file;
- struct path path;
+ const struct path parentpath = { .mnt = cache->mnt, .dentry = fan };
uint64_t ni_size;
long ret;
cachefiles_begin_secure(cache, &saved_cred);
- path.mnt = cache->mnt;
ret = cachefiles_inject_write_error();
- if (ret == 0)
- path.dentry = vfs_tmpfile(&init_user_ns, fan, S_IFREG, O_RDWR);
- else
- path.dentry = ERR_PTR(ret);
- if (IS_ERR(path.dentry)) {
- trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(path.dentry),
+ if (ret == 0) {
+ file = vfs_tmpfile_open(&init_user_ns, &parentpath, S_IFREG,
+ O_RDWR | O_LARGEFILE | O_DIRECT,
+ cache->cache_cred);
+ ret = PTR_ERR_OR_ZERO(file);
+ }
+ if (ret) {
+ trace_cachefiles_vfs_error(object, d_inode(fan), ret,
cachefiles_trace_tmpfile_error);
- if (PTR_ERR(path.dentry) == -EIO)
+ if (ret == -EIO)
cachefiles_io_error_obj(object, "Failed to create tmpfile");
- file = ERR_CAST(path.dentry);
- goto out;
+ goto err;
}
- trace_cachefiles_tmpfile(object, d_backing_inode(path.dentry));
+ trace_cachefiles_tmpfile(object, file_inode(file));
- if (!cachefiles_mark_inode_in_use(object, path.dentry)) {
- file = ERR_PTR(-EBUSY);
- goto out_dput;
- }
+ /* This is a newly created file with no other possible user */
+ if (!cachefiles_mark_inode_in_use(object, file_inode(file)))
+ WARN_ON(1);
ret = cachefiles_ondemand_init_object(object);
- if (ret < 0) {
- file = ERR_PTR(ret);
- goto out_unuse;
- }
+ if (ret < 0)
+ goto err_unuse;
ni_size = object->cookie->object_size;
ni_size = round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE);
if (ni_size > 0) {
- trace_cachefiles_trunc(object, d_backing_inode(path.dentry), 0, ni_size,
+ trace_cachefiles_trunc(object, file_inode(file), 0, ni_size,
cachefiles_trunc_expand_tmpfile);
ret = cachefiles_inject_write_error();
if (ret == 0)
- ret = vfs_truncate(&path, ni_size);
+ ret = vfs_truncate(&file->f_path, ni_size);
if (ret < 0) {
trace_cachefiles_vfs_error(
- object, d_backing_inode(path.dentry), ret,
+ object, file_inode(file), ret,
cachefiles_trace_trunc_error);
- file = ERR_PTR(ret);
- goto out_unuse;
+ goto err_unuse;
}
}
- file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
- d_backing_inode(path.dentry), cache->cache_cred);
- if (IS_ERR(file)) {
- trace_cachefiles_vfs_error(object, d_backing_inode(path.dentry),
- PTR_ERR(file),
- cachefiles_trace_open_error);
- goto out_unuse;
- }
+ ret = -EINVAL;
if (unlikely(!file->f_op->read_iter) ||
unlikely(!file->f_op->write_iter)) {
fput(file);
pr_notice("Cache does not support read_iter and write_iter\n");
- file = ERR_PTR(-EINVAL);
- goto out_unuse;
+ goto err_unuse;
}
-
- goto out_dput;
-
-out_unuse:
- cachefiles_do_unmark_inode_in_use(object, path.dentry);
-out_dput:
- dput(path.dentry);
out:
cachefiles_end_secure(cache, saved_cred);
return file;
+
+err_unuse:
+ cachefiles_do_unmark_inode_in_use(object, file_inode(file));
+ fput(file);
+err:
+ file = ERR_PTR(ret);
+ goto out;
}
/*
@@ -569,8 +548,11 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
_enter("%pd", dentry);
- if (!cachefiles_mark_inode_in_use(object, dentry))
+ if (!cachefiles_mark_inode_in_use(object, d_inode(dentry))) {
+ pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
+ dentry, d_inode(dentry)->i_ino);
return false;
+ }
/* We need to open a file interface onto a data file now as we can't do
* it on demand because writeback called from do_exit() sees
@@ -624,7 +606,7 @@ check_failed:
error_fput:
fput(file);
error:
- cachefiles_do_unmark_inode_in_use(object, dentry);
+ cachefiles_do_unmark_inode_in_use(object, d_inode(dentry));
dput(dentry);
return false;
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 53cfe026b3ea..fb023f9fafcb 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -754,6 +754,7 @@ void ceph_add_cap(struct inode *inode,
cap->issue_seq = seq;
cap->mseq = mseq;
cap->cap_gen = gen;
+ wake_up_all(&ci->i_cap_wq);
}
/*
@@ -2285,7 +2286,7 @@ retry:
struct ceph_mds_request *req;
int i;
- sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
+ sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
if (!sessions) {
err = -ENOMEM;
goto out;
@@ -2759,13 +2760,17 @@ again:
* on transition from wanted -> needed caps. This is needed
* for WRBUFFER|WR -> WR to avoid a new WR sync write from
* going before a prior buffered writeback happens.
+ *
+ * For RDCACHE|RD -> RD, there is not need to wait and we can
+ * just exclude the revoking caps and force to sync read.
*/
int not = want & ~(have & need);
int revoking = implemented & ~have;
+ int exclude = revoking & not;
dout("get_cap_refs %p have %s but not %s (revoking %s)\n",
inode, ceph_cap_string(have), ceph_cap_string(not),
ceph_cap_string(revoking));
- if ((revoking & not) == 0) {
+ if (!exclude || !(exclude & CEPH_CAP_FILE_BUFFER)) {
if (!snap_rwsem_locked &&
!ci->i_head_snapc &&
(need & CEPH_CAP_FILE_WR)) {
@@ -2787,7 +2792,7 @@ again:
snap_rwsem_locked = true;
}
if ((have & want) == want)
- *got = need | want;
+ *got = need | (want & ~exclude);
else
*got = need;
ceph_take_cap_refs(ci, *got, true);
@@ -3550,6 +3555,9 @@ static void handle_cap_grant(struct inode *inode,
check_caps = 1; /* check auth cap only */
else
check_caps = 2; /* check all caps */
+ /* If there is new caps, try to wake up the waiters */
+ if (~cap->issued & newcaps)
+ wake = true;
cap->issued = newcaps;
cap->implemented |= newcaps;
} else if (cap->issued == newcaps) {
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e0fa66ac8b9f..f780e4e0d062 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -181,6 +181,7 @@ struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
{
struct inode *inode = __lookup_inode(sb, ino);
+ struct ceph_inode_info *ci = ceph_inode(inode);
int err;
if (IS_ERR(inode))
@@ -192,7 +193,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
return ERR_PTR(err);
}
/* -ESTALE if inode as been unlinked and no file is open */
- if ((inode->i_nlink == 0) && (atomic_read(&inode->i_count) == 1)) {
+ if ((inode->i_nlink == 0) && !__ceph_is_file_opened(ci)) {
iput(inode);
return ERR_PTR(-ESTALE);
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 42351d7a0dd6..4af5e55abc15 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -362,7 +362,7 @@ static int ceph_fill_fragtree(struct inode *inode,
if (nsplits != ci->i_fragtree_nsplits) {
update = true;
} else if (nsplits) {
- i = prandom_u32() % nsplits;
+ i = prandom_u32_max(nsplits);
id = le32_to_cpu(fragtree->splits[i].frag);
if (!__ceph_find_frag(ci, id))
update = true;
@@ -2192,6 +2192,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
&prealloc_cf);
inode->i_ctime = attr->ia_ctime;
+ inode_inc_iversion_raw(inode);
}
release &= issued;
@@ -2356,6 +2357,7 @@ int ceph_do_getvxattr(struct inode *inode, const char *name, void *value,
goto out;
}
+ req->r_feature_needed = CEPHFS_FEATURE_OP_GETVXATTR;
req->r_path2 = kstrdup(name, GFP_NOFS);
if (!req->r_path2) {
err = -ENOMEM;
@@ -2447,6 +2449,7 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
+ struct super_block *sb = inode->i_sb;
struct ceph_inode_info *ci = ceph_inode(inode);
u32 valid_mask = STATX_BASIC_STATS;
int err = 0;
@@ -2476,16 +2479,34 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
}
if (ceph_snap(inode) == CEPH_NOSNAP)
- stat->dev = inode->i_sb->s_dev;
+ stat->dev = sb->s_dev;
else
stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0;
if (S_ISDIR(inode->i_mode)) {
- if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
- RBYTES))
+ if (ceph_test_mount_opt(ceph_sb_to_client(sb), RBYTES)) {
stat->size = ci->i_rbytes;
- else
+ } else if (ceph_snap(inode) == CEPH_SNAPDIR) {
+ struct ceph_inode_info *pci;
+ struct ceph_snap_realm *realm;
+ struct inode *parent;
+
+ parent = ceph_lookup_inode(sb, ceph_ino(inode));
+ if (!parent)
+ return PTR_ERR(parent);
+
+ pci = ceph_inode(parent);
+ spin_lock(&pci->i_ceph_lock);
+ realm = pci->i_snap_realm;
+ if (realm)
+ stat->size = realm->num_snaps;
+ else
+ stat->size = 0;
+ spin_unlock(&pci->i_ceph_lock);
+ iput(parent);
+ } else {
stat->size = ci->i_files + ci->i_subdirs;
+ }
stat->blocks = 0;
stat->blksize = 65536;
/*
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 80f8b9ec1a31..26a0a8b9975e 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2318,6 +2318,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
INIT_LIST_HEAD(&req->r_unsafe_dir_item);
INIT_LIST_HEAD(&req->r_unsafe_target_item);
req->r_fmode = -1;
+ req->r_feature_needed = -1;
kref_init(&req->r_kref);
RB_CLEAR_NODE(&req->r_node);
INIT_LIST_HEAD(&req->r_wait);
@@ -2916,6 +2917,16 @@ static void __do_request(struct ceph_mds_client *mdsc,
dout("do_request mds%d session %p state %s\n", mds, session,
ceph_session_state_name(session->s_state));
+
+ /*
+ * The old ceph will crash the MDSs when see unknown OPs
+ */
+ if (req->r_feature_needed > 0 &&
+ !test_bit(req->r_feature_needed, &session->s_features)) {
+ err = -EOPNOTSUPP;
+ goto out_session;
+ }
+
if (session->s_state != CEPH_MDS_SESSION_OPEN &&
session->s_state != CEPH_MDS_SESSION_HUNG) {
/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 256e3eada6c1..0598faa50e2e 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -31,8 +31,9 @@ enum ceph_feature_type {
CEPHFS_FEATURE_METRIC_COLLECT,
CEPHFS_FEATURE_ALTERNATE_NAME,
CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
+ CEPHFS_FEATURE_OP_GETVXATTR,
- CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
+ CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_OP_GETVXATTR,
};
#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \
@@ -44,6 +45,7 @@ enum ceph_feature_type {
CEPHFS_FEATURE_DELEG_INO, \
CEPHFS_FEATURE_METRIC_COLLECT, \
CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \
+ CEPHFS_FEATURE_OP_GETVXATTR, \
}
/*
@@ -336,6 +338,8 @@ struct ceph_mds_request {
long long r_dir_ordered_cnt;
int r_readdir_cache_idx;
+ int r_feature_needed;
+
struct ceph_cap_reservation r_caps_reservation;
};
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 8d0a6d2c2da4..3fbabc98e1f7 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -29,7 +29,7 @@ static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
return -1;
/* pick */
- n = prandom_u32() % n;
+ n = prandom_u32_max(n);
for (j = 0, i = 0; i < m->possible_max_rank; i++) {
if (CEPH_MDS_IS_READY(i, ignore_laggy))
j++;
diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c
index b401339f6e73..60399081046a 100644
--- a/fs/cifs/cached_dir.c
+++ b/fs/cifs/cached_dir.c
@@ -5,12 +5,99 @@
* Copyright (c) 2022, Ronnie Sahlberg <lsahlber@redhat.com>
*/
+#include <linux/namei.h>
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifs_debug.h"
#include "smb2proto.h"
#include "cached_dir.h"
+static struct cached_fid *init_cached_dir(const char *path);
+static void free_cached_dir(struct cached_fid *cfid);
+
+static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
+ const char *path,
+ bool lookup_only)
+{
+ struct cached_fid *cfid;
+
+ spin_lock(&cfids->cfid_list_lock);
+ list_for_each_entry(cfid, &cfids->entries, entry) {
+ if (!strcmp(cfid->path, path)) {
+ /*
+ * If it doesn't have a lease it is either not yet
+ * fully cached or it may be in the process of
+ * being deleted due to a lease break.
+ */
+ if (!cfid->has_lease) {
+ spin_unlock(&cfids->cfid_list_lock);
+ return NULL;
+ }
+ kref_get(&cfid->refcount);
+ spin_unlock(&cfids->cfid_list_lock);
+ return cfid;
+ }
+ }
+ if (lookup_only) {
+ spin_unlock(&cfids->cfid_list_lock);
+ return NULL;
+ }
+ if (cfids->num_entries >= MAX_CACHED_FIDS) {
+ spin_unlock(&cfids->cfid_list_lock);
+ return NULL;
+ }
+ cfid = init_cached_dir(path);
+ if (cfid == NULL) {
+ spin_unlock(&cfids->cfid_list_lock);
+ return NULL;
+ }
+ cfid->cfids = cfids;
+ cfids->num_entries++;
+ list_add(&cfid->entry, &cfids->entries);
+ cfid->on_list = true;
+ kref_get(&cfid->refcount);
+ spin_unlock(&cfids->cfid_list_lock);
+ return cfid;
+}
+
+static struct dentry *
+path_to_dentry(struct cifs_sb_info *cifs_sb, const char *path)
+{
+ struct dentry *dentry;
+ const char *s, *p;
+ char sep;
+
+ sep = CIFS_DIR_SEP(cifs_sb);
+ dentry = dget(cifs_sb->root);
+ s = path;
+
+ do {
+ struct inode *dir = d_inode(dentry);
+ struct dentry *child;
+
+ if (!S_ISDIR(dir->i_mode)) {
+ dput(dentry);
+ dentry = ERR_PTR(-ENOTDIR);
+ break;
+ }
+
+ /* skip separators */
+ while (*s == sep)
+ s++;
+ if (!*s)
+ break;
+ p = s++;
+ /* next separator */
+ while (*s && *s != sep)
+ s++;
+
+ child = lookup_positive_unlocked(p, dentry, s - p);
+ dput(dentry);
+ dentry = child;
+ } while (!IS_ERR(dentry));
+ return dentry;
+}
+
/*
* Open the and cache a directory handle.
* If error then *cfid is not initialized.
@@ -31,54 +118,57 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
struct kvec qi_iov[1];
int rc, flags = 0;
- __le16 utf16_path = 0; /* Null - since an open of top of share */
+ __le16 *utf16_path = NULL;
u8 oplock = SMB2_OPLOCK_LEVEL_II;
struct cifs_fid *pfid;
- struct dentry *dentry;
+ struct dentry *dentry = NULL;
struct cached_fid *cfid;
+ struct cached_fids *cfids;
- if (tcon == NULL || tcon->nohandlecache ||
+ if (tcon == NULL || tcon->cfids == NULL || tcon->nohandlecache ||
is_smb1_server(tcon->ses->server))
return -EOPNOTSUPP;
ses = tcon->ses;
server = ses->server;
+ cfids = tcon->cfids;
- if (cifs_sb->root == NULL)
- return -ENOENT;
+ if (!server->ops->new_lease_key)
+ return -EIO;
- if (strlen(path))
+ if (cifs_sb->root == NULL)
return -ENOENT;
- dentry = cifs_sb->root;
+ utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
+ if (!utf16_path)
+ return -ENOMEM;
- cfid = tcon->cfid;
- mutex_lock(&cfid->fid_mutex);
- if (cfid->is_valid) {
- cifs_dbg(FYI, "found a cached root file handle\n");
+ cfid = find_or_create_cached_dir(cfids, path, lookup_only);
+ if (cfid == NULL) {
+ kfree(utf16_path);
+ return -ENOENT;
+ }
+ /*
+ * At this point we either have a lease already and we can just
+ * return it. If not we are guaranteed to be the only thread accessing
+ * this cfid.
+ */
+ if (cfid->has_lease) {
*ret_cfid = cfid;
- kref_get(&cfid->refcount);
- mutex_unlock(&cfid->fid_mutex);
+ kfree(utf16_path);
return 0;
}
/*
* We do not hold the lock for the open because in case
- * SMB2_open needs to reconnect, it will end up calling
- * cifs_mark_open_files_invalid() which takes the lock again
- * thus causing a deadlock
+ * SMB2_open needs to reconnect.
+ * This is safe because no other thread will be able to get a ref
+ * to the cfid until we have finished opening the file and (possibly)
+ * acquired a lease.
*/
- mutex_unlock(&cfid->fid_mutex);
-
- if (lookup_only)
- return -ENOENT;
-
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- if (!server->ops->new_lease_key)
- return -EIO;
-
pfid = &cfid->fid;
server->ops->new_lease_key(pfid);
@@ -99,7 +189,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
oparms.reconnect = false;
rc = SMB2_open_init(tcon, server,
- &rqst[0], &oplock, &oparms, &utf16_path);
+ &rqst[0], &oplock, &oparms, utf16_path);
if (rc)
goto oshr_free;
smb2_set_next_command(tcon, &rqst[0]);
@@ -122,47 +212,13 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
rc = compound_send_recv(xid, ses, server,
flags, 2, rqst,
resp_buftype, rsp_iov);
- mutex_lock(&cfid->fid_mutex);
-
- /*
- * Now we need to check again as the cached root might have
- * been successfully re-opened from a concurrent process
- */
-
- if (cfid->is_valid) {
- /* work was already done */
-
- /* stash fids for close() later */
- struct cifs_fid fid = {
- .persistent_fid = pfid->persistent_fid,
- .volatile_fid = pfid->volatile_fid,
- };
-
- /*
- * caller expects this func to set the fid in cfid to valid
- * cached root, so increment the refcount.
- */
- kref_get(&cfid->refcount);
-
- mutex_unlock(&cfid->fid_mutex);
-
- if (rc == 0) {
- /* close extra handle outside of crit sec */
- SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
- }
- rc = 0;
- goto oshr_free;
- }
-
- /* Cached root is still invalid, continue normaly */
-
if (rc) {
if (rc == -EREMCHG) {
tcon->need_reconnect = true;
pr_warn_once("server share %s deleted\n",
- tcon->treeName);
+ tcon->tree_name);
}
- goto oshr_exit;
+ goto oshr_free;
}
atomic_inc(&tcon->num_remote_opens);
@@ -174,30 +230,18 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
oparms.fid->mid = le64_to_cpu(o_rsp->hdr.MessageId);
#endif /* CIFS_DEBUG2 */
- cfid->tcon = tcon;
- cfid->is_valid = true;
- cfid->dentry = dentry;
- dget(dentry);
- kref_init(&cfid->refcount);
+ if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE)
+ goto oshr_free;
- /* BB TBD check to see if oplock level check can be removed below */
- if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) {
- /*
- * See commit 2f94a3125b87. Increment the refcount when we
- * get a lease for root, release it if lease break occurs
- */
- kref_get(&cfid->refcount);
- cfid->has_lease = true;
- smb2_parse_contexts(server, o_rsp,
- &oparms.fid->epoch,
- oparms.fid->lease_key, &oplock,
- NULL, NULL);
- } else
- goto oshr_exit;
+
+ smb2_parse_contexts(server, o_rsp,
+ &oparms.fid->epoch,
+ oparms.fid->lease_key, &oplock,
+ NULL, NULL);
qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info))
- goto oshr_exit;
+ goto oshr_free;
if (!smb2_validate_and_copy_iov(
le16_to_cpu(qi_rsp->OutputBufferOffset),
sizeof(struct smb2_file_all_info),
@@ -205,15 +249,42 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
(char *)&cfid->file_all_info))
cfid->file_all_info_is_valid = true;
+ if (!path[0])
+ dentry = dget(cifs_sb->root);
+ else {
+ dentry = path_to_dentry(cifs_sb, path);
+ if (IS_ERR(dentry)) {
+ rc = -ENOENT;
+ goto oshr_free;
+ }
+ }
+ cfid->dentry = dentry;
+ cfid->tcon = tcon;
cfid->time = jiffies;
+ cfid->is_open = true;
+ cfid->has_lease = true;
-oshr_exit:
- mutex_unlock(&cfid->fid_mutex);
oshr_free:
+ kfree(utf16_path);
SMB2_open_free(&rqst[0]);
SMB2_query_info_free(&rqst[1]);
free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+ spin_lock(&cfids->cfid_list_lock);
+ if (!cfid->has_lease) {
+ if (cfid->on_list) {
+ list_del(&cfid->entry);
+ cfid->on_list = false;
+ cfids->num_entries--;
+ }
+ rc = -ENOENT;
+ }
+ spin_unlock(&cfids->cfid_list_lock);
+ if (rc) {
+ free_cached_dir(cfid);
+ cfid = NULL;
+ }
+
if (rc == 0)
*ret_cfid = cfid;
@@ -225,18 +296,22 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon,
struct cached_fid **ret_cfid)
{
struct cached_fid *cfid;
+ struct cached_fids *cfids = tcon->cfids;
- cfid = tcon->cfid;
+ if (cfids == NULL)
+ return -ENOENT;
- mutex_lock(&cfid->fid_mutex);
- if (cfid->dentry == dentry) {
- cifs_dbg(FYI, "found a cached root file handle by dentry\n");
- *ret_cfid = cfid;
- kref_get(&cfid->refcount);
- mutex_unlock(&cfid->fid_mutex);
- return 0;
+ spin_lock(&cfids->cfid_list_lock);
+ list_for_each_entry(cfid, &cfids->entries, entry) {
+ if (dentry && cfid->dentry == dentry) {
+ cifs_dbg(FYI, "found a cached root file handle by dentry\n");
+ kref_get(&cfid->refcount);
+ *ret_cfid = cfid;
+ spin_unlock(&cfids->cfid_list_lock);
+ return 0;
+ }
}
- mutex_unlock(&cfid->fid_mutex);
+ spin_unlock(&cfids->cfid_list_lock);
return -ENOENT;
}
@@ -245,63 +320,50 @@ smb2_close_cached_fid(struct kref *ref)
{
struct cached_fid *cfid = container_of(ref, struct cached_fid,
refcount);
- struct cached_dirent *dirent, *q;
- if (cfid->is_valid) {
- cifs_dbg(FYI, "clear cached root file handle\n");
- SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
- cfid->fid.volatile_fid);
+ spin_lock(&cfid->cfids->cfid_list_lock);
+ if (cfid->on_list) {
+ list_del(&cfid->entry);
+ cfid->on_list = false;
+ cfid->cfids->num_entries--;
}
+ spin_unlock(&cfid->cfids->cfid_list_lock);
- /*
- * We only check validity above to send SMB2_close,
- * but we still need to invalidate these entries
- * when this function is called
- */
- cfid->is_valid = false;
- cfid->file_all_info_is_valid = false;
- cfid->has_lease = false;
- if (cfid->dentry) {
- dput(cfid->dentry);
- cfid->dentry = NULL;
- }
- /*
- * Delete all cached dirent names
- */
- mutex_lock(&cfid->dirents.de_mutex);
- list_for_each_entry_safe(dirent, q, &cfid->dirents.entries, entry) {
- list_del(&dirent->entry);
- kfree(dirent->name);
- kfree(dirent);
+ dput(cfid->dentry);
+ cfid->dentry = NULL;
+
+ if (cfid->is_open) {
+ SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
+ cfid->fid.volatile_fid);
}
- cfid->dirents.is_valid = 0;
- cfid->dirents.is_failed = 0;
- cfid->dirents.ctx = NULL;
- cfid->dirents.pos = 0;
- mutex_unlock(&cfid->dirents.de_mutex);
+ free_cached_dir(cfid);
}
-void close_cached_dir(struct cached_fid *cfid)
+void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon,
+ const char *name, struct cifs_sb_info *cifs_sb)
{
- mutex_lock(&cfid->fid_mutex);
- kref_put(&cfid->refcount, smb2_close_cached_fid);
- mutex_unlock(&cfid->fid_mutex);
-}
+ struct cached_fid *cfid = NULL;
+ int rc;
-void close_cached_dir_lease_locked(struct cached_fid *cfid)
-{
+ rc = open_cached_dir(xid, tcon, name, cifs_sb, true, &cfid);
+ if (rc) {
+ cifs_dbg(FYI, "no cached dir found for rmdir(%s)\n", name);
+ return;
+ }
+ spin_lock(&cfid->cfids->cfid_list_lock);
if (cfid->has_lease) {
cfid->has_lease = false;
kref_put(&cfid->refcount, smb2_close_cached_fid);
}
+ spin_unlock(&cfid->cfids->cfid_list_lock);
+ close_cached_dir(cfid);
}
-void close_cached_dir_lease(struct cached_fid *cfid)
+
+void close_cached_dir(struct cached_fid *cfid)
{
- mutex_lock(&cfid->fid_mutex);
- close_cached_dir_lease_locked(cfid);
- mutex_unlock(&cfid->fid_mutex);
+ kref_put(&cfid->refcount, smb2_close_cached_fid);
}
/*
@@ -314,34 +376,60 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb)
struct cached_fid *cfid;
struct cifs_tcon *tcon;
struct tcon_link *tlink;
+ struct cached_fids *cfids;
for (node = rb_first(root); node; node = rb_next(node)) {
tlink = rb_entry(node, struct tcon_link, tl_rbnode);
tcon = tlink_tcon(tlink);
if (IS_ERR(tcon))
continue;
- cfid = tcon->cfid;
- mutex_lock(&cfid->fid_mutex);
- if (cfid->dentry) {
+ cfids = tcon->cfids;
+ if (cfids == NULL)
+ continue;
+ list_for_each_entry(cfid, &cfids->entries, entry) {
dput(cfid->dentry);
cfid->dentry = NULL;
}
- mutex_unlock(&cfid->fid_mutex);
}
}
/*
- * Invalidate and close all cached dirs when a TCON has been reset
+ * Invalidate all cached dirs when a TCON has been reset
* due to a session loss.
*/
void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
{
- mutex_lock(&tcon->cfid->fid_mutex);
- tcon->cfid->is_valid = false;
- /* cached handle is not valid, so SMB2_CLOSE won't be sent below */
- close_cached_dir_lease_locked(tcon->cfid);
- memset(&tcon->cfid->fid, 0, sizeof(struct cifs_fid));
- mutex_unlock(&tcon->cfid->fid_mutex);
+ struct cached_fids *cfids = tcon->cfids;
+ struct cached_fid *cfid, *q;
+ LIST_HEAD(entry);
+
+ spin_lock(&cfids->cfid_list_lock);
+ list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
+ list_move(&cfid->entry, &entry);
+ cfids->num_entries--;
+ cfid->is_open = false;
+ cfid->on_list = false;
+ /* To prevent race with smb2_cached_lease_break() */
+ kref_get(&cfid->refcount);
+ }
+ spin_unlock(&cfids->cfid_list_lock);
+
+ list_for_each_entry_safe(cfid, q, &entry, entry) {
+ list_del(&cfid->entry);
+ cancel_work_sync(&cfid->lease_break);
+ if (cfid->has_lease) {
+ /*
+ * We lease was never cancelled from the server so we
+ * need to drop the reference.
+ */
+ spin_lock(&cfids->cfid_list_lock);
+ cfid->has_lease = false;
+ spin_unlock(&cfids->cfid_list_lock);
+ kref_put(&cfid->refcount, smb2_close_cached_fid);
+ }
+ /* Drop the extra reference opened above*/
+ kref_put(&cfid->refcount, smb2_close_cached_fid);
+ }
}
static void
@@ -350,39 +438,121 @@ smb2_cached_lease_break(struct work_struct *work)
struct cached_fid *cfid = container_of(work,
struct cached_fid, lease_break);
- close_cached_dir_lease(cfid);
+ spin_lock(&cfid->cfids->cfid_list_lock);
+ cfid->has_lease = false;
+ spin_unlock(&cfid->cfids->cfid_list_lock);
+ kref_put(&cfid->refcount, smb2_close_cached_fid);
}
int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
{
- if (tcon->cfid->is_valid &&
- !memcmp(lease_key,
- tcon->cfid->fid.lease_key,
- SMB2_LEASE_KEY_SIZE)) {
- tcon->cfid->time = 0;
- INIT_WORK(&tcon->cfid->lease_break,
- smb2_cached_lease_break);
- queue_work(cifsiod_wq,
- &tcon->cfid->lease_break);
- return true;
+ struct cached_fids *cfids = tcon->cfids;
+ struct cached_fid *cfid;
+
+ if (cfids == NULL)
+ return false;
+
+ spin_lock(&cfids->cfid_list_lock);
+ list_for_each_entry(cfid, &cfids->entries, entry) {
+ if (cfid->has_lease &&
+ !memcmp(lease_key,
+ cfid->fid.lease_key,
+ SMB2_LEASE_KEY_SIZE)) {
+ cfid->time = 0;
+ /*
+ * We found a lease remove it from the list
+ * so no threads can access it.
+ */
+ list_del(&cfid->entry);
+ cfid->on_list = false;
+ cfids->num_entries--;
+
+ queue_work(cifsiod_wq,
+ &cfid->lease_break);
+ spin_unlock(&cfids->cfid_list_lock);
+ return true;
+ }
}
+ spin_unlock(&cfids->cfid_list_lock);
return false;
}
-struct cached_fid *init_cached_dir(void)
+static struct cached_fid *init_cached_dir(const char *path)
{
struct cached_fid *cfid;
- cfid = kzalloc(sizeof(*cfid), GFP_KERNEL);
+ cfid = kzalloc(sizeof(*cfid), GFP_ATOMIC);
if (!cfid)
return NULL;
+ cfid->path = kstrdup(path, GFP_ATOMIC);
+ if (!cfid->path) {
+ kfree(cfid);
+ return NULL;
+ }
+
+ INIT_WORK(&cfid->lease_break, smb2_cached_lease_break);
+ INIT_LIST_HEAD(&cfid->entry);
INIT_LIST_HEAD(&cfid->dirents.entries);
mutex_init(&cfid->dirents.de_mutex);
- mutex_init(&cfid->fid_mutex);
+ spin_lock_init(&cfid->fid_lock);
+ kref_init(&cfid->refcount);
return cfid;
}
-void free_cached_dir(struct cifs_tcon *tcon)
+static void free_cached_dir(struct cached_fid *cfid)
+{
+ struct cached_dirent *dirent, *q;
+
+ dput(cfid->dentry);
+ cfid->dentry = NULL;
+
+ /*
+ * Delete all cached dirent names
+ */
+ list_for_each_entry_safe(dirent, q, &cfid->dirents.entries, entry) {
+ list_del(&dirent->entry);
+ kfree(dirent->name);
+ kfree(dirent);
+ }
+
+ kfree(cfid->path);
+ cfid->path = NULL;
+ kfree(cfid);
+}
+
+struct cached_fids *init_cached_dirs(void)
+{
+ struct cached_fids *cfids;
+
+ cfids = kzalloc(sizeof(*cfids), GFP_KERNEL);
+ if (!cfids)
+ return NULL;
+ spin_lock_init(&cfids->cfid_list_lock);
+ INIT_LIST_HEAD(&cfids->entries);
+ return cfids;
+}
+
+/*
+ * Called from tconInfoFree when we are tearing down the tcon.
+ * There are no active users or open files/directories at this point.
+ */
+void free_cached_dirs(struct cached_fids *cfids)
{
- kfree(tcon->cfid);
+ struct cached_fid *cfid, *q;
+ LIST_HEAD(entry);
+
+ spin_lock(&cfids->cfid_list_lock);
+ list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
+ cfid->on_list = false;
+ cfid->is_open = false;
+ list_move(&cfid->entry, &entry);
+ }
+ spin_unlock(&cfids->cfid_list_lock);
+
+ list_for_each_entry_safe(cfid, q, &entry, entry) {
+ list_del(&cfid->entry);
+ free_cached_dir(cfid);
+ }
+
+ kfree(cfids);
}
diff --git a/fs/cifs/cached_dir.h b/fs/cifs/cached_dir.h
index bd262dc8b179..2f4e764c9ca9 100644
--- a/fs/cifs/cached_dir.h
+++ b/fs/cifs/cached_dir.h
@@ -31,13 +31,17 @@ struct cached_dirents {
};
struct cached_fid {
- bool is_valid:1; /* Do we have a useable root fid */
- bool file_all_info_is_valid:1;
+ struct list_head entry;
+ struct cached_fids *cfids;
+ const char *path;
bool has_lease:1;
+ bool is_open:1;
+ bool on_list:1;
+ bool file_all_info_is_valid:1;
unsigned long time; /* jiffies of when lease was taken */
struct kref refcount;
struct cifs_fid fid;
- struct mutex fid_mutex;
+ spinlock_t fid_lock;
struct cifs_tcon *tcon;
struct dentry *dentry;
struct work_struct lease_break;
@@ -45,8 +49,18 @@ struct cached_fid {
struct cached_dirents dirents;
};
-extern struct cached_fid *init_cached_dir(void);
-extern void free_cached_dir(struct cifs_tcon *tcon);
+#define MAX_CACHED_FIDS 16
+struct cached_fids {
+ /* Must be held when:
+ * - accessing the cfids->entries list
+ */
+ spinlock_t cfid_list_lock;
+ int num_entries;
+ struct list_head entries;
+};
+
+extern struct cached_fids *init_cached_dirs(void);
+extern void free_cached_dirs(struct cached_fids *cfids);
extern int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
const char *path,
struct cifs_sb_info *cifs_sb,
@@ -55,8 +69,10 @@ extern int open_cached_dir_by_dentry(struct cifs_tcon *tcon,
struct dentry *dentry,
struct cached_fid **cfid);
extern void close_cached_dir(struct cached_fid *cfid);
-extern void close_cached_dir_lease(struct cached_fid *cfid);
-extern void close_cached_dir_lease_locked(struct cached_fid *cfid);
+extern void drop_cached_dir_by_name(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ const char *name,
+ struct cifs_sb_info *cifs_sb);
extern void close_all_cached_dirs(struct cifs_sb_info *cifs_sb);
extern void invalidate_all_cached_dirs(struct cifs_tcon *tcon);
extern int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]);
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index c05477e28cff..90850da390ae 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -87,7 +87,7 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
{
__u32 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
- seq_printf(m, "%s Mounts: %d ", tcon->treeName, tcon->tc_count);
+ seq_printf(m, "%s Mounts: %d ", tcon->tree_name, tcon->tc_count);
if (tcon->nativeFileSystem)
seq_printf(m, "Type: %s ", tcon->nativeFileSystem);
seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x\n\tPathComponentMax: %d Status: %d",
@@ -601,7 +601,7 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
i++;
- seq_printf(m, "\n%d) %s", i, tcon->treeName);
+ seq_printf(m, "\n%d) %s", i, tcon->tree_name);
if (tcon->need_reconnect)
seq_puts(m, "\tDISCONNECTED ");
seq_printf(m, "\nSMBs: %d",
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index ee4ea2b60c0f..d44808263cfb 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -108,8 +108,8 @@ do { \
#define cifs_tcon_dbg_func(ratefunc, type, fmt, ...) \
do { \
const char *tn = ""; \
- if (tcon && tcon->treeName) \
- tn = tcon->treeName; \
+ if (tcon && tcon->tree_name) \
+ tn = tcon->tree_name; \
if ((type) & FYI && cifsFYI & CIFS_INFO) { \
pr_debug_ ## ratefunc("%s: %s " fmt, \
__FILE__, tn, ##__VA_ARGS__); \
@@ -150,7 +150,7 @@ do { \
#define cifs_tcon_dbg(type, fmt, ...) \
do { \
if (0) \
- pr_debug("%s " fmt, tcon->treeName, ##__VA_ARGS__); \
+ pr_debug("%s " fmt, tcon->tree_name, ##__VA_ARGS__); \
} while (0)
#define cifs_info(fmt, ...) \
diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h
index b87cbbe6d2d4..d86d78d5bfdc 100644
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/cifs/cifs_ioctl.h
@@ -91,6 +91,13 @@ struct smb3_notify {
bool watch_tree;
} __packed;
+struct smb3_notify_info {
+ __u32 completion_filter;
+ bool watch_tree;
+ __u32 data_len; /* size of notify data below */
+ __u8 notify_data[];
+} __packed;
+
#define CIFS_IOCTL_MAGIC 0xCF
#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int)
#define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4)
@@ -100,6 +107,7 @@ struct smb3_notify {
#define CIFS_DUMP_KEY _IOWR(CIFS_IOCTL_MAGIC, 8, struct smb3_key_debug_info)
#define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify)
#define CIFS_DUMP_FULL_KEY _IOWR(CIFS_IOCTL_MAGIC, 10, struct smb3_full_key_debug_info)
+#define CIFS_IOC_NOTIFY_INFO _IOWR(CIFS_IOCTL_MAGIC, 11, struct smb3_notify_info)
#define CIFS_IOC_SHUTDOWN _IOR ('X', 125, __u32)
/*
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index 1e4c7cc5287f..7233c6a7e6d7 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -256,23 +256,23 @@ static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon)
const char *share_name;
const char *net_name;
- net_name = extract_hostname(tcon->treeName);
+ net_name = extract_hostname(tcon->tree_name);
if (IS_ERR(net_name)) {
int ret;
ret = PTR_ERR(net_name);
cifs_dbg(VFS, "%s: failed to extract host name from target '%s': %d\n",
- __func__, tcon->treeName, ret);
+ __func__, tcon->tree_name, ret);
return ERR_PTR(-EINVAL);
}
- share_name = extract_sharename(tcon->treeName);
+ share_name = extract_sharename(tcon->tree_name);
if (IS_ERR(share_name)) {
int ret;
ret = PTR_ERR(share_name);
cifs_dbg(VFS, "%s: failed to extract share name from target '%s': %d\n",
- __func__, tcon->treeName, ret);
+ __func__, tcon->tree_name, ret);
kfree(net_name);
return ERR_PTR(-EINVAL);
}
@@ -335,14 +335,14 @@ static struct cifs_swn_reg *cifs_get_swn_reg(struct cifs_tcon *tcon)
goto fail;
}
- reg->net_name = extract_hostname(tcon->treeName);
+ reg->net_name = extract_hostname(tcon->tree_name);
if (IS_ERR(reg->net_name)) {
ret = PTR_ERR(reg->net_name);
cifs_dbg(VFS, "%s: failed to extract host name from target: %d\n", __func__, ret);
goto fail_idr;
}
- reg->share_name = extract_sharename(tcon->treeName);
+ reg->share_name = extract_sharename(tcon->tree_name);
if (IS_ERR(reg->share_name)) {
ret = PTR_ERR(reg->share_name);
cifs_dbg(VFS, "%s: failed to extract share name from target: %d\n", __func__, ret);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 46f5718754f9..5db73c0f792a 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -103,26 +103,24 @@ static int cifs_calc_signature(struct smb_rqst *rqst,
if (!rqst->rq_iov || !signature || !server)
return -EINVAL;
- rc = cifs_alloc_hash("md5", &server->secmech.md5,
- &server->secmech.sdescmd5);
+ rc = cifs_alloc_hash("md5", &server->secmech.md5);
if (rc)
return -1;
- rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
+ rc = crypto_shash_init(server->secmech.md5);
if (rc) {
cifs_dbg(VFS, "%s: Could not init md5\n", __func__);
return rc;
}
- rc = crypto_shash_update(&server->secmech.sdescmd5->shash,
+ rc = crypto_shash_update(server->secmech.md5,
server->session_key.response, server->session_key.len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
return rc;
}
- return __cifs_calc_signature(rqst, server, signature,
- &server->secmech.sdescmd5->shash);
+ return __cifs_calc_signature(rqst, server, signature, server->secmech.md5);
}
/* must be called with server->srv_mutex held */
@@ -412,7 +410,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
wchar_t *domain;
wchar_t *server;
- if (!ses->server->secmech.sdeschmacmd5) {
+ if (!ses->server->secmech.hmacmd5) {
cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__);
return -1;
}
@@ -420,14 +418,14 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
/* calculate md4 hash of password */
E_md4hash(ses->password, nt_hash, nls_cp);
- rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
+ rc = crypto_shash_setkey(ses->server->secmech.hmacmd5->tfm, nt_hash,
CIFS_NTHASH_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not set NT Hash as a key\n", __func__);
return rc;
}
- rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+ rc = crypto_shash_init(ses->server->secmech.hmacmd5);
if (rc) {
cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__);
return rc;
@@ -448,7 +446,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
memset(user, '\0', 2);
}
- rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ rc = crypto_shash_update(ses->server->secmech.hmacmd5,
(char *)user, 2 * len);
kfree(user);
if (rc) {
@@ -468,7 +466,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
len = cifs_strtoUTF16((__le16 *)domain, ses->domainName, len,
nls_cp);
rc =
- crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ crypto_shash_update(ses->server->secmech.hmacmd5,
(char *)domain, 2 * len);
kfree(domain);
if (rc) {
@@ -488,7 +486,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len,
nls_cp);
rc =
- crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ crypto_shash_update(ses->server->secmech.hmacmd5,
(char *)server, 2 * len);
kfree(server);
if (rc) {
@@ -498,7 +496,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
}
}
- rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+ rc = crypto_shash_final(ses->server->secmech.hmacmd5,
ntlmv2_hash);
if (rc)
cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
@@ -518,12 +516,12 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE +
offsetof(struct ntlmv2_resp, challenge.key[0]));
- if (!ses->server->secmech.sdeschmacmd5) {
+ if (!ses->server->secmech.hmacmd5) {
cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__);
return -1;
}
- rc = crypto_shash_setkey(ses->server->secmech.hmacmd5,
+ rc = crypto_shash_setkey(ses->server->secmech.hmacmd5->tfm,
ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n",
@@ -531,7 +529,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
return rc;
}
- rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+ rc = crypto_shash_init(ses->server->secmech.hmacmd5);
if (rc) {
cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__);
return rc;
@@ -543,7 +541,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
else
memcpy(ntlmv2->challenge.key,
ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
- rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ rc = crypto_shash_update(ses->server->secmech.hmacmd5,
ntlmv2->challenge.key, hash_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
@@ -551,7 +549,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
}
/* Note that the MD5 digest over writes anon.challenge_key.key */
- rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+ rc = crypto_shash_final(ses->server->secmech.hmacmd5,
ntlmv2->ntlmv2_hash);
if (rc)
cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
@@ -627,9 +625,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
cifs_server_lock(ses->server);
- rc = cifs_alloc_hash("hmac(md5)",
- &ses->server->secmech.hmacmd5,
- &ses->server->secmech.sdeschmacmd5);
+ rc = cifs_alloc_hash("hmac(md5)", &ses->server->secmech.hmacmd5);
if (rc) {
goto unlock;
}
@@ -649,7 +645,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
}
/* now calculate the session key for NTLMv2 */
- rc = crypto_shash_setkey(ses->server->secmech.hmacmd5,
+ rc = crypto_shash_setkey(ses->server->secmech.hmacmd5->tfm,
ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n",
@@ -657,13 +653,13 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
goto unlock;
}
- rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+ rc = crypto_shash_init(ses->server->secmech.hmacmd5);
if (rc) {
cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__);
goto unlock;
}
- rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ rc = crypto_shash_update(ses->server->secmech.hmacmd5,
ntlmv2->ntlmv2_hash,
CIFS_HMAC_MD5_HASH_SIZE);
if (rc) {
@@ -671,7 +667,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
goto unlock;
}
- rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+ rc = crypto_shash_final(ses->server->secmech.hmacmd5,
ses->auth_key.response);
if (rc)
cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
@@ -679,7 +675,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
unlock:
cifs_server_unlock(ses->server);
setup_ntlmv2_rsp_ret:
- kfree(tiblob);
+ kfree_sensitive(tiblob);
return rc;
}
@@ -718,49 +714,19 @@ calc_seckey(struct cifs_ses *ses)
void
cifs_crypto_secmech_release(struct TCP_Server_Info *server)
{
- if (server->secmech.cmacaes) {
- crypto_free_shash(server->secmech.cmacaes);
- server->secmech.cmacaes = NULL;
- }
-
- if (server->secmech.hmacsha256) {
- crypto_free_shash(server->secmech.hmacsha256);
- server->secmech.hmacsha256 = NULL;
- }
-
- if (server->secmech.md5) {
- crypto_free_shash(server->secmech.md5);
- server->secmech.md5 = NULL;
- }
+ cifs_free_hash(&server->secmech.aes_cmac);
+ cifs_free_hash(&server->secmech.hmacsha256);
+ cifs_free_hash(&server->secmech.md5);
+ cifs_free_hash(&server->secmech.sha512);
+ cifs_free_hash(&server->secmech.hmacmd5);
- if (server->secmech.sha512) {
- crypto_free_shash(server->secmech.sha512);
- server->secmech.sha512 = NULL;
+ if (server->secmech.enc) {
+ crypto_free_aead(server->secmech.enc);
+ server->secmech.enc = NULL;
}
- if (server->secmech.hmacmd5) {
- crypto_free_shash(server->secmech.hmacmd5);
- server->secmech.hmacmd5 = NULL;
+ if (server->secmech.dec) {
+ crypto_free_aead(server->secmech.dec);
+ server->secmech.dec = NULL;
}
-
- if (server->secmech.ccmaesencrypt) {
- crypto_free_aead(server->secmech.ccmaesencrypt);
- server->secmech.ccmaesencrypt = NULL;
- }
-
- if (server->secmech.ccmaesdecrypt) {
- crypto_free_aead(server->secmech.ccmaesdecrypt);
- server->secmech.ccmaesdecrypt = NULL;
- }
-
- kfree(server->secmech.sdesccmacaes);
- server->secmech.sdesccmacaes = NULL;
- kfree(server->secmech.sdeschmacsha256);
- server->secmech.sdeschmacsha256 = NULL;
- kfree(server->secmech.sdeschmacmd5);
- server->secmech.sdeschmacmd5 = NULL;
- kfree(server->secmech.sdescmd5);
- server->secmech.sdescmd5 = NULL;
- kfree(server->secmech.sdescsha512);
- server->secmech.sdescsha512 = NULL;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8042d7280dec..fe220686bba4 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -396,6 +396,7 @@ cifs_alloc_inode(struct super_block *sb)
cifs_inode->epoch = 0;
spin_lock_init(&cifs_inode->open_file_lock);
generate_random_uuid(cifs_inode->lease_key);
+ cifs_inode->symlink_target = NULL;
/*
* Can not set i_flags here - they get immediately overwritten to zero
@@ -412,7 +413,11 @@ cifs_alloc_inode(struct super_block *sb)
static void
cifs_free_inode(struct inode *inode)
{
- kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
+ struct cifsInodeInfo *cinode = CIFS_I(inode);
+
+ if (S_ISLNK(inode->i_mode))
+ kfree(cinode->symlink_target);
+ kmem_cache_free(cifs_inode_cachep, cinode);
}
static void
@@ -1138,6 +1143,30 @@ const struct inode_operations cifs_file_inode_ops = {
.fiemap = cifs_fiemap,
};
+const char *cifs_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *done)
+{
+ char *target_path;
+
+ target_path = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!target_path)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock(&inode->i_lock);
+ if (likely(CIFS_I(inode)->symlink_target)) {
+ strscpy(target_path, CIFS_I(inode)->symlink_target, PATH_MAX);
+ } else {
+ kfree(target_path);
+ target_path = ERR_PTR(-EOPNOTSUPP);
+ }
+ spin_unlock(&inode->i_lock);
+
+ if (!IS_ERR(target_path))
+ set_delayed_call(done, kfree_link, target_path);
+
+ return target_path;
+}
+
const struct inode_operations cifs_symlink_inode_ops = {
.get_link = cifs_get_link,
.permission = cifs_permission,
@@ -1297,8 +1326,11 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
ssize_t rc;
struct cifsFileInfo *cfile = dst_file->private_data;
- if (cfile->swapfile)
- return -EOPNOTSUPP;
+ if (cfile->swapfile) {
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
+ }
rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff,
len, flags);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 5b4a7a32bdc5..388b745a978e 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -153,6 +153,6 @@ extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
/* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 39
-#define CIFS_VERSION "2.39"
+#define SMB3_PRODUCT_BUILD 40
+#define CIFS_VERSION "2.40"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ae7f571a7dba..1420acf987f0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -153,26 +153,16 @@ struct session_key {
char *response;
};
-/* crypto security descriptor definition */
-struct sdesc {
- struct shash_desc shash;
- char ctx[];
-};
-
/* crypto hashing related structure/fields, not specific to a sec mech */
struct cifs_secmech {
- struct crypto_shash *hmacmd5; /* hmac-md5 hash function */
- struct crypto_shash *md5; /* md5 hash function */
- struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */
- struct crypto_shash *cmacaes; /* block-cipher based MAC function */
- struct crypto_shash *sha512; /* sha512 hash function */
- struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */
- struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
- struct sdesc *sdeschmacsha256; /* ctxt to generate smb2 signature */
- struct sdesc *sdesccmacaes; /* ctxt to generate smb3 signature */
- struct sdesc *sdescsha512; /* ctxt to generate smb3.11 signing key */
- struct crypto_aead *ccmaesencrypt; /* smb3 encryption aead */
- struct crypto_aead *ccmaesdecrypt; /* smb3 decryption aead */
+ struct shash_desc *hmacmd5; /* hmacmd5 hash function, for NTLMv2/CR1 hashes */
+ struct shash_desc *md5; /* md5 hash function, for CIFS/SMB1 signatures */
+ struct shash_desc *hmacsha256; /* hmac-sha256 hash function, for SMB2 signatures */
+ struct shash_desc *sha512; /* sha512 hash function, for SMB3.1.1 preauth hash */
+ struct shash_desc *aes_cmac; /* block-cipher based MAC function, for SMB3 signatures */
+
+ struct crypto_aead *enc; /* smb3 encryption AEAD TFM (AES-CCM and AES-GCM) */
+ struct crypto_aead *dec; /* smb3 decryption AEAD TFM (AES-CCM and AES-GCM) */
};
/* per smb session structure/fields */
@@ -195,6 +185,19 @@ struct cifs_cred {
struct cifs_ace *aces;
};
+struct cifs_open_info_data {
+ char *symlink_target;
+ union {
+ struct smb2_file_all_info fi;
+ struct smb311_posix_qinfo posix_fi;
+ };
+};
+
+static inline void cifs_free_open_info(struct cifs_open_info_data *data)
+{
+ kfree(data->symlink_target);
+}
+
/*
*****************************************************************
* Except the CIFS PDUs themselves all the
@@ -317,20 +320,20 @@ struct smb_version_operations {
int (*is_path_accessible)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const char *);
/* query path data from the server */
- int (*query_path_info)(const unsigned int, struct cifs_tcon *,
- struct cifs_sb_info *, const char *,
- FILE_ALL_INFO *, bool *, bool *);
+ int (*query_path_info)(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, const char *full_path,
+ struct cifs_open_info_data *data, bool *adjust_tz, bool *reparse);
/* query file data from the server */
- int (*query_file_info)(const unsigned int, struct cifs_tcon *,
- struct cifs_fid *, FILE_ALL_INFO *);
+ int (*query_file_info)(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *cfile, struct cifs_open_info_data *data);
/* query reparse tag from srv to determine which type of special file */
int (*query_reparse_tag)(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, const char *path,
__u32 *reparse_tag);
/* get server index number */
- int (*get_srv_inum)(const unsigned int, struct cifs_tcon *,
- struct cifs_sb_info *, const char *,
- u64 *uniqueid, FILE_ALL_INFO *);
+ int (*get_srv_inum)(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, const char *full_path, u64 *uniqueid,
+ struct cifs_open_info_data *data);
/* set size by path */
int (*set_path_size)(const unsigned int, struct cifs_tcon *,
const char *, __u64, struct cifs_sb_info *, bool);
@@ -379,8 +382,8 @@ struct smb_version_operations {
struct cifs_sb_info *, const char *,
char **, bool);
/* open a file for non-posix mounts */
- int (*open)(const unsigned int, struct cifs_open_parms *,
- __u32 *, FILE_ALL_INFO *);
+ int (*open)(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock,
+ void *buf);
/* set fid protocol-specific info */
void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
/* close a file */
@@ -451,7 +454,7 @@ struct smb_version_operations {
int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *src_file, void __user *);
int (*notify)(const unsigned int xid, struct file *pfile,
- void __user *pbuf);
+ void __user *pbuf, bool return_changes);
int (*query_mf_symlink)(unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const unsigned char *,
char *, unsigned int *);
@@ -1133,6 +1136,7 @@ struct cifs_fattr {
struct timespec64 cf_mtime;
struct timespec64 cf_ctime;
u32 cf_cifstag;
+ char *cf_symlink_target;
};
/*
@@ -1149,7 +1153,7 @@ struct cifs_tcon {
struct list_head openFileList;
spinlock_t open_file_lock; /* protects list above */
struct cifs_ses *ses; /* pointer to session associated with */
- char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
+ char tree_name[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
char *nativeFileSystem;
char *password; /* for share-level security */
__u32 tid; /* The 4 byte tree id */
@@ -1228,7 +1232,7 @@ struct cifs_tcon {
struct fscache_volume *fscache; /* cookie for share */
#endif
struct list_head pending_opens; /* list of incomplete opens */
- struct cached_fid *cfid; /* Cached root fid */
+ struct cached_fids *cfids;
/* BB add field for back pointer to sb struct(s)? */
#ifdef CONFIG_CIFS_DFS_UPCALL
struct list_head ulist; /* cache update list */
@@ -1395,6 +1399,7 @@ struct cifsFileInfo {
struct work_struct put; /* work for the final part of _put */
struct delayed_work deferred;
bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
+ char *symlink_target;
};
struct cifs_io_parms {
@@ -1553,6 +1558,7 @@ struct cifsInodeInfo {
struct list_head deferred_closes; /* list of deferred closes */
spinlock_t deferred_lock; /* protection on deferred list */
bool lease_granted; /* Flag to indicate whether lease or oplock is granted. */
+ char *symlink_target;
};
static inline struct cifsInodeInfo *
@@ -2121,4 +2127,14 @@ static inline size_t ntlmssp_workstation_name_size(const struct cifs_ses *ses)
return sizeof(ses->workstation_name);
}
+static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const FILE_ALL_INFO *src)
+{
+ memcpy(dst, src, (size_t)((u8 *)&src->AccessFlags - (u8 *)src));
+ dst->AccessFlags = src->AccessFlags;
+ dst->CurrentByteOffset = src->CurrentByteOffset;
+ dst->Mode = src->Mode;
+ dst->AlignmentRequirement = src->AlignmentRequirement;
+ dst->FileNameLength = src->FileNameLength;
+}
+
#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index aeba371c4c70..d1abaeea974a 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -483,7 +483,7 @@ put_bcc(__u16 count, struct smb_hdr *hdr)
typedef struct negotiate_req {
struct smb_hdr hdr; /* wct = 0 */
__le16 ByteCount;
- unsigned char DialectsArray[1];
+ unsigned char DialectsArray[];
} __attribute__((packed)) NEGOTIATE_REQ;
#define MIN_TZ_ADJ (15 * 60) /* minimum grid for timezones in seconds */
@@ -508,13 +508,14 @@ typedef struct negotiate_rsp {
__u8 EncryptionKeyLength;
__u16 ByteCount;
union {
- unsigned char EncryptionKey[1]; /* cap extended security off */
+ /* cap extended security off */
+ DECLARE_FLEX_ARRAY(unsigned char, EncryptionKey);
/* followed by Domain name - if extended security is off */
/* followed by 16 bytes of server GUID */
/* then security blob if cap_extended_security negotiated */
struct {
unsigned char GUID[SMB1_CLIENT_GUID_SIZE];
- unsigned char SecurityBlob[1];
+ unsigned char SecurityBlob[];
} __attribute__((packed)) extended_response;
} __attribute__((packed)) u;
} __attribute__((packed)) NEGOTIATE_RSP;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 3bc94bcc7177..83e83d8beabb 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -182,10 +182,9 @@ extern int cifs_unlock_range(struct cifsFileInfo *cfile,
extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile);
extern void cifs_down_write(struct rw_semaphore *sem);
-extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid,
- struct file *file,
- struct tcon_link *tlink,
- __u32 oplock);
+struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
+ struct tcon_link *tlink, __u32 oplock,
+ const char *symlink_target);
extern int cifs_posix_open(const char *full_path, struct inode **inode,
struct super_block *sb, int mode,
unsigned int f_flags, __u32 *oplock, __u16 *netfid,
@@ -200,9 +199,9 @@ extern int cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr);
extern struct inode *cifs_iget(struct super_block *sb,
struct cifs_fattr *fattr);
-extern int cifs_get_inode_info(struct inode **inode, const char *full_path,
- FILE_ALL_INFO *data, struct super_block *sb,
- int xid, const struct cifs_fid *fid);
+int cifs_get_inode_info(struct inode **inode, const char *full_path,
+ struct cifs_open_info_data *data, struct super_block *sb, int xid,
+ const struct cifs_fid *fid);
extern int smb311_posix_get_inode_info(struct inode **pinode, const char *search_path,
struct super_block *sb, unsigned int xid);
extern int cifs_get_inode_info_unix(struct inode **pinode,
@@ -598,9 +597,8 @@ struct cifs_aio_ctx *cifs_aio_ctx_alloc(void);
void cifs_aio_ctx_release(struct kref *refcount);
int setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw);
-int cifs_alloc_hash(const char *name, struct crypto_shash **shash,
- struct sdesc **sdesc);
-void cifs_free_hash(struct crypto_shash **shash, struct sdesc **sdesc);
+int cifs_alloc_hash(const char *name, struct shash_desc **sdesc);
+void cifs_free_hash(struct shash_desc **sdesc);
extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page,
unsigned int *len, unsigned int *offset);
@@ -639,7 +637,7 @@ cifs_chan_is_iface_active(struct cifs_ses *ses,
int
cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server);
int
-SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon);
+SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_mount);
void extract_unc_hostname(const char *unc, const char **h, size_t *len);
int copy_path_name(char *dst, const char *src);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 7aa91e272027..1724066c1536 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -465,7 +465,7 @@ CIFSSMBNegotiate(const unsigned int xid,
for (i = 0; i < CIFS_NUM_PROT; i++) {
size_t len = strlen(protocols[i].name) + 1;
- memcpy(pSMB->DialectsArray+count, protocols[i].name, len);
+ memcpy(&pSMB->DialectsArray[count], protocols[i].name, len);
count += len;
}
inc_rfc1001_len(pSMB, count);
@@ -2305,7 +2305,7 @@ int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *pTcon,
remap);
}
rename_info->target_name_len = cpu_to_le32(2 * len_of_str);
- count = 12 /* sizeof(struct set_file_rename) */ + (2 * len_of_str);
+ count = sizeof(struct set_file_rename) + (2 * len_of_str);
byte_count += count;
pSMB->DataCount = cpu_to_le16(count);
pSMB->TotalDataCount = pSMB->DataCount;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7ae6f2c08153..1cc47dd3b4d6 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -155,7 +155,7 @@ static void smb2_query_server_interfaces(struct work_struct *work)
/*
* query server network interfaces, in case they change
*/
- rc = SMB3_request_interfaces(0, tcon);
+ rc = SMB3_request_interfaces(0, tcon, false);
if (rc) {
cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n",
__func__, rc);
@@ -311,7 +311,7 @@ cifs_abort_connection(struct TCP_Server_Info *server)
}
server->sequence_number = 0;
server->session_estab = false;
- kfree(server->session_key.response);
+ kfree_sensitive(server->session_key.response);
server->session_key.response = NULL;
server->session_key.len = 0;
server->lstrp = jiffies;
@@ -1580,10 +1580,11 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
cifs_crypto_secmech_release(server);
- kfree(server->session_key.response);
+ kfree_sensitive(server->session_key.response);
server->session_key.response = NULL;
server->session_key.len = 0;
kfree(server->hostname);
+ server->hostname = NULL;
task = xchg(&server->tsk, NULL);
if (task)
@@ -1940,7 +1941,8 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
spin_unlock(&ses->ses_lock);
cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
- cifs_dbg(FYI, "%s: ses ipc: %s\n", __func__, ses->tcon_ipc ? ses->tcon_ipc->treeName : "NONE");
+ cifs_dbg(FYI,
+ "%s: ses ipc: %s\n", __func__, ses->tcon_ipc ? ses->tcon_ipc->tree_name : "NONE");
spin_lock(&cifs_tcp_ses_lock);
if (--ses->ses_count > 0) {
@@ -2293,7 +2295,7 @@ static int match_tcon(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
{
if (tcon->status == TID_EXITING)
return 0;
- if (strncmp(tcon->treeName, ctx->UNC, MAX_TREE_SIZE))
+ if (strncmp(tcon->tree_name, ctx->UNC, MAX_TREE_SIZE))
return 0;
if (tcon->seal != ctx->seal)
return 0;
@@ -2831,9 +2833,12 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
* sessinit is sent but no second negprot
*/
struct rfc1002_session_packet *ses_init_buf;
+ unsigned int req_noscope_len;
struct smb_hdr *smb_buf;
+
ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet),
GFP_KERNEL);
+
if (ses_init_buf) {
ses_init_buf->trailer.session_req.called_len = 32;
@@ -2869,8 +2874,12 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
ses_init_buf->trailer.session_req.scope2 = 0;
smb_buf = (struct smb_hdr *)ses_init_buf;
- /* sizeof RFC1002_SESSION_REQUEST with no scope */
- smb_buf->smb_buf_length = cpu_to_be32(0x81000044);
+ /* sizeof RFC1002_SESSION_REQUEST with no scopes */
+ req_noscope_len = sizeof(struct rfc1002_session_packet) - 2;
+
+ /* == cpu_to_be32(0x81000044) */
+ smb_buf->smb_buf_length =
+ cpu_to_be32((RFC1002_SESSION_REQUEST << 24) | req_noscope_len);
rc = smb_send(server, smb_buf, 0x44);
kfree(ses_init_buf);
/*
@@ -3921,12 +3930,11 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
pSMB->AndXCommand = 0xFF;
pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO);
bcc_ptr = &pSMB->Password[0];
- if (tcon->pipe || (ses->server->sec_mode & SECMODE_USER)) {
- pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
- *bcc_ptr = 0; /* password is null byte */
- bcc_ptr++; /* skip password */
- /* already aligned so no need to do it below */
- }
+
+ pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
+ *bcc_ptr = 0; /* password is null byte */
+ bcc_ptr++; /* skip password */
+ /* already aligned so no need to do it below */
if (ses->server->sign)
smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
@@ -3989,7 +3997,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
}
bcc_ptr += length + 1;
bytes_left -= (length + 1);
- strscpy(tcon->treeName, tree, sizeof(tcon->treeName));
+ strscpy(tcon->tree_name, tree, sizeof(tcon->tree_name));
/* mostly informational -- no need to fail on error here */
kfree(tcon->nativeFileSystem);
@@ -4134,7 +4142,7 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
if (ses->auth_key.response) {
cifs_dbg(FYI, "Free previous auth_key.response = %p\n",
ses->auth_key.response);
- kfree(ses->auth_key.response);
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.response = NULL;
ses->auth_key.len = 0;
}
@@ -4197,7 +4205,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
ctx->local_nls = cifs_sb->local_nls;
ctx->linux_uid = fsuid;
ctx->cred_uid = fsuid;
- ctx->UNC = master_tcon->treeName;
+ ctx->UNC = master_tcon->tree_name;
ctx->retry = master_tcon->retry;
ctx->nocase = master_tcon->nocase;
ctx->nohandlecache = master_tcon->nohandlecache;
@@ -4663,7 +4671,7 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
/* If it is not dfs or there was no cached dfs referral, then reconnect to same share */
if (!server->current_fullpath ||
dfs_cache_noreq_find(server->current_fullpath + 1, &ref, &tl)) {
- rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, cifs_sb->local_nls);
+ rc = ops->tree_connect(xid, tcon->ses, tcon->tree_name, tcon, cifs_sb->local_nls);
goto out;
}
@@ -4707,7 +4715,7 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
tcon->status = TID_IN_TCON;
spin_unlock(&tcon->tc_lock);
- rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc);
+ rc = ops->tree_connect(xid, tcon->ses, tcon->tree_name, tcon, nlsc);
if (rc) {
spin_lock(&tcon->tc_lock);
if (tcon->status == TID_IN_TCON)
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index a9b6c3eba6de..e70915ad7541 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -98,7 +98,7 @@ static struct cifs_ses *find_ipc_from_server_path(struct cifs_ses **ses, const c
get_ipc_unc(path, unc, sizeof(unc));
for (; *ses; ses++) {
- if (!strcasecmp(unc, (*ses)->tcon_ipc->treeName))
+ if (!strcasecmp(unc, (*ses)->tcon_ipc->tree_name))
return *ses;
}
return ERR_PTR(-ENOENT);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 08f7392716e2..8b1c37158556 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -50,7 +50,7 @@ cifs_build_path_to_root(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_s
}
if (add_treename)
- dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1);
+ dfsplen = strnlen(tcon->tree_name, MAX_TREE_SIZE + 1);
else
dfsplen = 0;
@@ -59,7 +59,7 @@ cifs_build_path_to_root(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_s
return full_path;
if (dfsplen)
- memcpy(full_path, tcon->treeName, dfsplen);
+ memcpy(full_path, tcon->tree_name, dfsplen);
full_path[dfsplen] = CIFS_DIR_SEP(cifs_sb);
memcpy(full_path + dfsplen + 1, ctx->prepath, pplen);
convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
@@ -93,7 +93,7 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page,
return ERR_PTR(-ENOMEM);
if (prefix)
- dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1);
+ dfsplen = strnlen(tcon->tree_name, MAX_TREE_SIZE + 1);
else
dfsplen = 0;
@@ -123,7 +123,7 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page,
}
if (dfsplen) {
s -= dfsplen;
- memcpy(s, tcon->treeName, dfsplen);
+ memcpy(s, tcon->tree_name, dfsplen);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
int i;
for (i = 0; i < dfsplen; i++) {
@@ -165,10 +165,9 @@ check_name(struct dentry *direntry, struct cifs_tcon *tcon)
/* Inode operations in similar order to how they appear in Linux file fs.h */
-static int
-cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
- struct tcon_link *tlink, unsigned oflags, umode_t mode,
- __u32 *oplock, struct cifs_fid *fid)
+static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
+ struct tcon_link *tlink, unsigned int oflags, umode_t mode, __u32 *oplock,
+ struct cifs_fid *fid, struct cifs_open_info_data *buf)
{
int rc = -ENOENT;
int create_options = CREATE_NOT_DIR;
@@ -177,7 +176,6 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
struct cifs_tcon *tcon = tlink_tcon(tlink);
const char *full_path;
void *page = alloc_dentry_path();
- FILE_ALL_INFO *buf = NULL;
struct inode *newinode = NULL;
int disposition;
struct TCP_Server_Info *server = tcon->ses->server;
@@ -290,12 +288,6 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
goto out;
}
- buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
- if (buf == NULL) {
- rc = -ENOMEM;
- goto out;
- }
-
/*
* if we're not using unix extensions, see if we need to set
* ATTR_READONLY on the create call
@@ -364,8 +356,7 @@ cifs_create_get_file_info:
{
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
/* TODO: Add support for calling POSIX query info here, but passing in fid */
- rc = cifs_get_inode_info(&newinode, full_path, buf, inode->i_sb,
- xid, fid);
+ rc = cifs_get_inode_info(&newinode, full_path, buf, inode->i_sb, xid, fid);
if (newinode) {
if (server->ops->set_lease_key)
server->ops->set_lease_key(newinode, fid);
@@ -402,7 +393,6 @@ cifs_create_set_dentry:
d_add(direntry, newinode);
out:
- kfree(buf);
free_dentry_path(page);
return rc;
@@ -423,10 +413,11 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
struct tcon_link *tlink;
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
- struct cifs_fid fid;
+ struct cifs_fid fid = {};
struct cifs_pending_open open;
__u32 oplock;
struct cifsFileInfo *file_info;
+ struct cifs_open_info_data buf = {};
if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb))))
return -EIO;
@@ -484,8 +475,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
cifs_add_pending_open(&fid, tlink, &open);
rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
- &oplock, &fid);
-
+ &oplock, &fid, &buf);
if (rc) {
cifs_del_pending_open(&open);
goto out;
@@ -510,7 +500,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
file->f_op = &cifs_file_direct_ops;
}
- file_info = cifs_new_fileinfo(&fid, file, tlink, oplock);
+ file_info = cifs_new_fileinfo(&fid, file, tlink, oplock, buf.symlink_target);
if (file_info == NULL) {
if (server->ops->close)
server->ops->close(xid, tcon, &fid);
@@ -526,6 +516,7 @@ out:
cifs_put_tlink(tlink);
out_free_xid:
free_xid(xid);
+ cifs_free_open_info(&buf);
return rc;
}
@@ -547,12 +538,15 @@ int cifs_create(struct user_namespace *mnt_userns, struct inode *inode,
struct TCP_Server_Info *server;
struct cifs_fid fid;
__u32 oplock;
+ struct cifs_open_info_data buf = {};
cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
inode, direntry, direntry);
- if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb))))
- return -EIO;
+ if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb)))) {
+ rc = -EIO;
+ goto out_free_xid;
+ }
tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
rc = PTR_ERR(tlink);
@@ -565,11 +559,11 @@ int cifs_create(struct user_namespace *mnt_userns, struct inode *inode,
if (server->ops->new_lease_key)
server->ops->new_lease_key(&fid);
- rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
- &oplock, &fid);
+ rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, &oplock, &fid, &buf);
if (!rc && server->ops->close)
server->ops->close(xid, tcon, &fid);
+ cifs_free_open_info(&buf);
cifs_put_tlink(tlink);
out_free_xid:
free_xid(xid);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6f38b134a346..cd9698209930 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -209,16 +209,14 @@ posix_open_ret:
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
-static int
-cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
- struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
- struct cifs_fid *fid, unsigned int xid)
+static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
+ struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
+ struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
{
int rc;
int desired_access;
int disposition;
int create_options = CREATE_NOT_DIR;
- FILE_ALL_INFO *buf;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
@@ -255,10 +253,6 @@ cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *ci
/* BB pass O_SYNC flag through on file attributes .. BB */
- buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
/* O_SYNC also has bit for O_DSYNC so following check picks up either */
if (f_flags & O_SYNC)
create_options |= CREATE_WRITE_THROUGH;
@@ -276,9 +270,8 @@ cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *ci
oparms.reconnect = false;
rc = server->ops->open(xid, &oparms, oplock, buf);
-
if (rc)
- goto out;
+ return rc;
/* TODO: Add support for calling posix query info but with passing in fid */
if (tcon->unix_ext)
@@ -294,8 +287,6 @@ cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *ci
rc = -EOPENSTALE;
}
-out:
- kfree(buf);
return rc;
}
@@ -325,9 +316,9 @@ cifs_down_write(struct rw_semaphore *sem)
static void cifsFileInfo_put_work(struct work_struct *work);
-struct cifsFileInfo *
-cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
- struct tcon_link *tlink, __u32 oplock)
+struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
+ struct tcon_link *tlink, __u32 oplock,
+ const char *symlink_target)
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
@@ -347,6 +338,15 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
return NULL;
}
+ if (symlink_target) {
+ cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
+ if (!cfile->symlink_target) {
+ kfree(fdlocks);
+ kfree(cfile);
+ return NULL;
+ }
+ }
+
INIT_LIST_HEAD(&fdlocks->locks);
fdlocks->cfile = cfile;
cfile->llist = fdlocks;
@@ -440,6 +440,7 @@ static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
cifs_put_tlink(cifs_file->tlink);
dput(cifs_file->dentry);
cifs_sb_deactive(sb);
+ kfree(cifs_file->symlink_target);
kfree(cifs_file);
}
@@ -488,7 +489,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct super_block *sb = inode->i_sb;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
- struct cifs_fid fid;
+ struct cifs_fid fid = {};
struct cifs_pending_open open;
bool oplock_break_cancelled;
@@ -570,8 +571,9 @@ int cifs_open(struct inode *inode, struct file *file)
void *page;
const char *full_path;
bool posix_open_ok = false;
- struct cifs_fid fid;
+ struct cifs_fid fid = {};
struct cifs_pending_open open;
+ struct cifs_open_info_data data = {};
xid = get_xid();
@@ -662,15 +664,15 @@ int cifs_open(struct inode *inode, struct file *file)
if (server->ops->get_lease_key)
server->ops->get_lease_key(inode, &fid);
- rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
- file->f_flags, &oplock, &fid, xid);
+ rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
+ xid, &data);
if (rc) {
cifs_del_pending_open(&open);
goto out;
}
}
- cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
+ cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
if (cfile == NULL) {
if (server->ops->close)
server->ops->close(xid, tcon, &fid);
@@ -712,6 +714,7 @@ out:
free_dentry_path(page);
free_xid(xid);
cifs_put_tlink(tlink);
+ cifs_free_open_info(&data);
return rc;
}
@@ -1882,11 +1885,13 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
struct cifsFileInfo *cfile;
__u32 type;
- rc = -EACCES;
xid = get_xid();
- if (!(fl->fl_flags & FL_FLOCK))
- return -ENOLCK;
+ if (!(fl->fl_flags & FL_FLOCK)) {
+ rc = -ENOLCK;
+ free_xid(xid);
+ return rc;
+ }
cfile = (struct cifsFileInfo *)file->private_data;
tcon = tlink_tcon(cfile->tlink);
@@ -1905,8 +1910,9 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
* if no lock or unlock then nothing to do since we do not
* know what it is
*/
+ rc = -EOPNOTSUPP;
free_xid(xid);
- return -EOPNOTSUPP;
+ return rc;
}
rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
@@ -2428,12 +2434,16 @@ cifs_writev_complete(struct work_struct *work)
struct cifs_writedata *
cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
{
+ struct cifs_writedata *writedata = NULL;
struct page **pages =
kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
- if (pages)
- return cifs_writedata_direct_alloc(pages, complete);
+ if (pages) {
+ writedata = cifs_writedata_direct_alloc(pages, complete);
+ if (!writedata)
+ kvfree(pages);
+ }
- return NULL;
+ return writedata;
}
struct cifs_writedata *
@@ -3293,6 +3303,9 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
cifs_uncached_writev_complete);
if (!wdata) {
rc = -ENOMEM;
+ for (i = 0; i < nr_pages; i++)
+ put_page(pagevec[i]);
+ kvfree(pagevec);
add_credits_and_wake_if(server, credits, 0);
break;
}
@@ -4271,6 +4284,15 @@ static ssize_t __cifs_readv(
len = ctx->len;
}
+ if (direct) {
+ rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
+ offset, offset + len - 1);
+ if (rc) {
+ kref_put(&ctx->refcount, cifs_aio_ctx_release);
+ return -EAGAIN;
+ }
+ }
+
/* grab a lock here due to read response handlers can access ctx */
mutex_lock(&ctx->aio_mutex);
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 0e13dec86b25..45119597c765 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -791,6 +791,13 @@ do { \
cifs_sb->ctx->field = NULL; \
} while (0)
+#define STEAL_STRING_SENSITIVE(cifs_sb, ctx, field) \
+do { \
+ kfree_sensitive(ctx->field); \
+ ctx->field = cifs_sb->ctx->field; \
+ cifs_sb->ctx->field = NULL; \
+} while (0)
+
static int smb3_reconfigure(struct fs_context *fc)
{
struct smb3_fs_context *ctx = smb3_fc2context(fc);
@@ -811,7 +818,7 @@ static int smb3_reconfigure(struct fs_context *fc)
STEAL_STRING(cifs_sb, ctx, UNC);
STEAL_STRING(cifs_sb, ctx, source);
STEAL_STRING(cifs_sb, ctx, username);
- STEAL_STRING(cifs_sb, ctx, password);
+ STEAL_STRING_SENSITIVE(cifs_sb, ctx, password);
STEAL_STRING(cifs_sb, ctx, domainname);
STEAL_STRING(cifs_sb, ctx, nodename);
STEAL_STRING(cifs_sb, ctx, iocharset);
@@ -1162,7 +1169,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
}
break;
case Opt_pass:
- kfree(ctx->password);
+ kfree_sensitive(ctx->password);
ctx->password = NULL;
if (strlen(param->string) == 0)
break;
@@ -1470,6 +1477,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
return 0;
cifs_parse_mount_err:
+ kfree_sensitive(ctx->password);
return -EINVAL;
}
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 23ef56f55ce5..a1751b956318 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -45,7 +45,7 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
memset(&key, 0, sizeof(key));
- sharename = extract_sharename(tcon->treeName);
+ sharename = extract_sharename(tcon->tree_name);
if (IS_ERR(sharename)) {
cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
return -EINVAL;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index bac08c20f559..4e2ca3c6e5c0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -210,6 +210,12 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
*/
inode->i_blocks = (512 - 1 + fattr->cf_bytes) >> 9;
}
+
+ if (S_ISLNK(fattr->cf_mode)) {
+ kfree(cifs_i->symlink_target);
+ cifs_i->symlink_target = fattr->cf_symlink_target;
+ fattr->cf_symlink_target = NULL;
+ }
spin_unlock(&inode->i_lock);
if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL)
@@ -347,13 +353,22 @@ cifs_get_file_info_unix(struct file *filp)
int rc;
unsigned int xid;
FILE_UNIX_BASIC_INFO find_data;
- struct cifs_fattr fattr;
+ struct cifs_fattr fattr = {};
struct inode *inode = file_inode(filp);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifsFileInfo *cfile = filp->private_data;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
xid = get_xid();
+
+ if (cfile->symlink_target) {
+ fattr.cf_symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL);
+ if (!fattr.cf_symlink_target) {
+ rc = -ENOMEM;
+ goto cifs_gfiunix_out;
+ }
+ }
+
rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->fid.netfid, &find_data);
if (!rc) {
cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb);
@@ -378,6 +393,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
FILE_UNIX_BASIC_INFO find_data;
struct cifs_fattr fattr;
struct cifs_tcon *tcon;
+ struct TCP_Server_Info *server;
struct tcon_link *tlink;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -387,10 +403,12 @@ int cifs_get_inode_info_unix(struct inode **pinode,
if (IS_ERR(tlink))
return PTR_ERR(tlink);
tcon = tlink_tcon(tlink);
+ server = tcon->ses->server;
/* could have done a find first instead but this returns more info */
rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data,
cifs_sb->local_nls, cifs_remap(cifs_sb));
+ cifs_dbg(FYI, "%s: query path info: rc = %d\n", __func__, rc);
cifs_put_tlink(tlink);
if (!rc) {
@@ -410,6 +428,17 @@ int cifs_get_inode_info_unix(struct inode **pinode,
cifs_dbg(FYI, "check_mf_symlink: %d\n", tmprc);
}
+ if (S_ISLNK(fattr.cf_mode) && !fattr.cf_symlink_target) {
+ if (!server->ops->query_symlink)
+ return -EOPNOTSUPP;
+ rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path,
+ &fattr.cf_symlink_target, false);
+ if (rc) {
+ cifs_dbg(FYI, "%s: query_symlink: %d\n", __func__, rc);
+ goto cgiiu_exit;
+ }
+ }
+
if (*pinode == NULL) {
/* get new inode */
cifs_fill_uniqueid(sb, &fattr);
@@ -432,6 +461,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
}
cgiiu_exit:
+ kfree(fattr.cf_symlink_target);
return rc;
}
#else
@@ -601,10 +631,10 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
}
/* Fill a cifs_fattr struct with info from POSIX info struct */
-static void
-smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct smb311_posix_qinfo *info,
- struct super_block *sb, bool adjust_tz, bool symlink)
+static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct cifs_open_info_data *data,
+ struct super_block *sb, bool adjust_tz, bool symlink)
{
+ struct smb311_posix_qinfo *info = &data->posix_fi;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
@@ -639,6 +669,8 @@ smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct smb311_posix_qinfo *
if (symlink) {
fattr->cf_mode |= S_IFLNK;
fattr->cf_dtype = DT_LNK;
+ fattr->cf_symlink_target = data->symlink_target;
+ data->symlink_target = NULL;
} else if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
fattr->cf_mode |= S_IFDIR;
fattr->cf_dtype = DT_DIR;
@@ -655,13 +687,11 @@ smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct smb311_posix_qinfo *
fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink);
}
-
-/* Fill a cifs_fattr struct with info from FILE_ALL_INFO */
-static void
-cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
- struct super_block *sb, bool adjust_tz,
- bool symlink, u32 reparse_tag)
+static void cifs_open_info_to_fattr(struct cifs_fattr *fattr, struct cifs_open_info_data *data,
+ struct super_block *sb, bool adjust_tz, bool symlink,
+ u32 reparse_tag)
{
+ struct smb2_file_all_info *info = &data->fi;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
@@ -703,7 +733,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
} else if (reparse_tag == IO_REPARSE_TAG_LX_BLK) {
fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode;
fattr->cf_dtype = DT_BLK;
- } else if (symlink) { /* TODO add more reparse tag checks */
+ } else if (symlink || reparse_tag == IO_REPARSE_TAG_SYMLINK ||
+ reparse_tag == IO_REPARSE_TAG_NFS) {
fattr->cf_mode = S_IFLNK;
fattr->cf_dtype = DT_LNK;
} else if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
@@ -735,6 +766,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
}
}
+ if (S_ISLNK(fattr->cf_mode)) {
+ fattr->cf_symlink_target = data->symlink_target;
+ data->symlink_target = NULL;
+ }
+
fattr->cf_uid = cifs_sb->ctx->linux_uid;
fattr->cf_gid = cifs_sb->ctx->linux_gid;
}
@@ -744,23 +780,28 @@ cifs_get_file_info(struct file *filp)
{
int rc;
unsigned int xid;
- FILE_ALL_INFO find_data;
+ struct cifs_open_info_data data = {};
struct cifs_fattr fattr;
struct inode *inode = file_inode(filp);
struct cifsFileInfo *cfile = filp->private_data;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
+ bool symlink = false;
+ u32 tag = 0;
if (!server->ops->query_file_info)
return -ENOSYS;
xid = get_xid();
- rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data);
+ rc = server->ops->query_file_info(xid, tcon, cfile, &data);
switch (rc) {
case 0:
/* TODO: add support to query reparse tag */
- cifs_all_info_to_fattr(&fattr, &find_data, inode->i_sb, false,
- false, 0 /* no reparse tag */);
+ if (data.symlink_target) {
+ symlink = true;
+ tag = IO_REPARSE_TAG_SYMLINK;
+ }
+ cifs_open_info_to_fattr(&fattr, &data, inode->i_sb, false, symlink, tag);
break;
case -EREMOTE:
cifs_create_dfs_fattr(&fattr, inode->i_sb);
@@ -789,6 +830,7 @@ cifs_get_file_info(struct file *filp)
/* if filetype is different, return error */
rc = cifs_fattr_to_inode(inode, &fattr);
cgfi_exit:
+ cifs_free_open_info(&data);
free_xid(xid);
return rc;
}
@@ -860,14 +902,9 @@ cifs_backup_query_path_info(int xid,
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
-static void
-cifs_set_fattr_ino(int xid,
- struct cifs_tcon *tcon,
- struct super_block *sb,
- struct inode **inode,
- const char *full_path,
- FILE_ALL_INFO *data,
- struct cifs_fattr *fattr)
+static void cifs_set_fattr_ino(int xid, struct cifs_tcon *tcon, struct super_block *sb,
+ struct inode **inode, const char *full_path,
+ struct cifs_open_info_data *data, struct cifs_fattr *fattr)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct TCP_Server_Info *server = tcon->ses->server;
@@ -885,11 +922,8 @@ cifs_set_fattr_ino(int xid,
* If we have an inode pass a NULL tcon to ensure we don't
* make a round trip to the server. This only works for SMB2+.
*/
- rc = server->ops->get_srv_inum(xid,
- *inode ? NULL : tcon,
- cifs_sb, full_path,
- &fattr->cf_uniqueid,
- data);
+ rc = server->ops->get_srv_inum(xid, *inode ? NULL : tcon, cifs_sb, full_path,
+ &fattr->cf_uniqueid, data);
if (rc) {
/*
* If that fails reuse existing ino or generate one
@@ -913,7 +947,7 @@ cifs_set_fattr_ino(int xid,
} else {
/* make an ino by hashing the UNC */
fattr->cf_flags |= CIFS_FATTR_FAKE_ROOT_INO;
- fattr->cf_uniqueid = simple_hashstr(tcon->treeName);
+ fattr->cf_uniqueid = simple_hashstr(tcon->tree_name);
}
}
}
@@ -923,14 +957,10 @@ static inline bool is_inode_cache_good(struct inode *ino)
return ino && CIFS_CACHE_READ(CIFS_I(ino)) && CIFS_I(ino)->time != 0;
}
-int
-cifs_get_inode_info(struct inode **inode,
- const char *full_path,
- FILE_ALL_INFO *in_data,
- struct super_block *sb, int xid,
- const struct cifs_fid *fid)
+int cifs_get_inode_info(struct inode **inode, const char *full_path,
+ struct cifs_open_info_data *data, struct super_block *sb, int xid,
+ const struct cifs_fid *fid)
{
-
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
struct tcon_link *tlink;
@@ -938,8 +968,7 @@ cifs_get_inode_info(struct inode **inode,
bool adjust_tz = false;
struct cifs_fattr fattr = {0};
bool is_reparse_point = false;
- FILE_ALL_INFO *data = in_data;
- FILE_ALL_INFO *tmp_data = NULL;
+ struct cifs_open_info_data tmp_data = {};
void *smb1_backup_rsp_buf = NULL;
int rc = 0;
int tmprc = 0;
@@ -960,21 +989,15 @@ cifs_get_inode_info(struct inode **inode,
cifs_dbg(FYI, "No need to revalidate cached inode sizes\n");
goto out;
}
- tmp_data = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
- if (!tmp_data) {
- rc = -ENOMEM;
- goto out;
- }
- rc = server->ops->query_path_info(xid, tcon, cifs_sb,
- full_path, tmp_data,
- &adjust_tz, &is_reparse_point);
+ rc = server->ops->query_path_info(xid, tcon, cifs_sb, full_path, &tmp_data,
+ &adjust_tz, &is_reparse_point);
#ifdef CONFIG_CIFS_DFS_UPCALL
if (rc == -ENOENT && is_tcon_dfs(tcon))
rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon,
cifs_sb,
full_path);
#endif
- data = tmp_data;
+ data = &tmp_data;
}
/*
@@ -988,14 +1011,24 @@ cifs_get_inode_info(struct inode **inode,
* since we have to check if its reparse tag matches a known
* special file type e.g. symlink or fifo or char etc.
*/
- if ((le32_to_cpu(data->Attributes) & ATTR_REPARSE) &&
- server->ops->query_reparse_tag) {
- rc = server->ops->query_reparse_tag(xid, tcon, cifs_sb,
- full_path, &reparse_tag);
- cifs_dbg(FYI, "reparse tag 0x%x\n", reparse_tag);
+ if (is_reparse_point && data->symlink_target) {
+ reparse_tag = IO_REPARSE_TAG_SYMLINK;
+ } else if ((le32_to_cpu(data->fi.Attributes) & ATTR_REPARSE) &&
+ server->ops->query_reparse_tag) {
+ tmprc = server->ops->query_reparse_tag(xid, tcon, cifs_sb, full_path,
+ &reparse_tag);
+ if (tmprc)
+ cifs_dbg(FYI, "%s: query_reparse_tag: rc = %d\n", __func__, tmprc);
+ if (server->ops->query_symlink) {
+ tmprc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path,
+ &data->symlink_target,
+ is_reparse_point);
+ if (tmprc)
+ cifs_dbg(FYI, "%s: query_symlink: rc = %d\n", __func__,
+ tmprc);
+ }
}
- cifs_all_info_to_fattr(&fattr, data, sb, adjust_tz,
- is_reparse_point, reparse_tag);
+ cifs_open_info_to_fattr(&fattr, data, sb, adjust_tz, is_reparse_point, reparse_tag);
break;
case -EREMOTE:
/* DFS link, no metadata available on this server */
@@ -1014,18 +1047,20 @@ cifs_get_inode_info(struct inode **inode,
*/
if (backup_cred(cifs_sb) && is_smb1_server(server)) {
/* for easier reading */
+ FILE_ALL_INFO *fi;
FILE_DIRECTORY_INFO *fdi;
SEARCH_ID_FULL_DIR_INFO *si;
rc = cifs_backup_query_path_info(xid, tcon, sb,
full_path,
&smb1_backup_rsp_buf,
- &data);
+ &fi);
if (rc)
goto out;
- fdi = (FILE_DIRECTORY_INFO *)data;
- si = (SEARCH_ID_FULL_DIR_INFO *)data;
+ move_cifs_info_to_smb2(&data->fi, fi);
+ fdi = (FILE_DIRECTORY_INFO *)fi;
+ si = (SEARCH_ID_FULL_DIR_INFO *)fi;
cifs_dir_info_to_fattr(&fattr, fdi, cifs_sb);
fattr.cf_uniqueid = le64_to_cpu(si->UniqueId);
@@ -1123,7 +1158,8 @@ handle_mnt_opt:
out:
cifs_buf_release(smb1_backup_rsp_buf);
cifs_put_tlink(tlink);
- kfree(tmp_data);
+ cifs_free_open_info(&tmp_data);
+ kfree(fattr.cf_symlink_target);
return rc;
}
@@ -1138,7 +1174,7 @@ smb311_posix_get_inode_info(struct inode **inode,
bool adjust_tz = false;
struct cifs_fattr fattr = {0};
bool symlink = false;
- struct smb311_posix_qinfo *data = NULL;
+ struct cifs_open_info_data data = {};
int rc = 0;
int tmprc = 0;
@@ -1155,15 +1191,9 @@ smb311_posix_get_inode_info(struct inode **inode,
cifs_dbg(FYI, "No need to revalidate cached inode sizes\n");
goto out;
}
- data = kmalloc(sizeof(struct smb311_posix_qinfo), GFP_KERNEL);
- if (!data) {
- rc = -ENOMEM;
- goto out;
- }
- rc = smb311_posix_query_path_info(xid, tcon, cifs_sb,
- full_path, data,
- &adjust_tz, &symlink);
+ rc = smb311_posix_query_path_info(xid, tcon, cifs_sb, full_path, &data, &adjust_tz,
+ &symlink);
/*
* 2. Convert it to internal cifs metadata (fattr)
@@ -1171,7 +1201,7 @@ smb311_posix_get_inode_info(struct inode **inode,
switch (rc) {
case 0:
- smb311_posix_info_to_fattr(&fattr, data, sb, adjust_tz, symlink);
+ smb311_posix_info_to_fattr(&fattr, &data, sb, adjust_tz, symlink);
break;
case -EREMOTE:
/* DFS link, no metadata available on this server */
@@ -1228,7 +1258,8 @@ smb311_posix_get_inode_info(struct inode **inode,
}
out:
cifs_put_tlink(tlink);
- kfree(data);
+ cifs_free_open_info(&data);
+ kfree(fattr.cf_symlink_target);
return rc;
}
@@ -2265,13 +2296,13 @@ cifs_dentry_needs_reval(struct dentry *dentry)
return true;
if (!open_cached_dir_by_dentry(tcon, dentry->d_parent, &cfid)) {
- mutex_lock(&cfid->fid_mutex);
+ spin_lock(&cfid->fid_lock);
if (cfid->time && cifs_i->time > cfid->time) {
- mutex_unlock(&cfid->fid_mutex);
+ spin_unlock(&cfid->fid_lock);
close_cached_dir(cfid);
return false;
}
- mutex_unlock(&cfid->fid_mutex);
+ spin_unlock(&cfid->fid_lock);
close_cached_dir(cfid);
}
/*
@@ -2327,7 +2358,7 @@ cifs_invalidate_mapping(struct inode *inode)
static int
cifs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- freezable_schedule_unsafe();
+ schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
return 0;
@@ -2345,7 +2376,7 @@ cifs_revalidate_mapping(struct inode *inode)
return 0;
rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
- TASK_KILLABLE);
+ TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (rc)
return rc;
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index b6e6e5d6c8dd..89d5fa887364 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -484,12 +484,35 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
tcon = tlink_tcon(tlink);
if (tcon && tcon->ses->server->ops->notify) {
rc = tcon->ses->server->ops->notify(xid,
- filep, (void __user *)arg);
+ filep, (void __user *)arg,
+ false /* no ret data */);
cifs_dbg(FYI, "ioctl notify rc %d\n", rc);
} else
rc = -EOPNOTSUPP;
cifs_put_tlink(tlink);
break;
+ case CIFS_IOC_NOTIFY_INFO:
+ if (!S_ISDIR(inode->i_mode)) {
+ /* Notify can only be done on directories */
+ rc = -EOPNOTSUPP;
+ break;
+ }
+ cifs_sb = CIFS_SB(inode->i_sb);
+ tlink = cifs_sb_tlink(cifs_sb);
+ if (IS_ERR(tlink)) {
+ rc = PTR_ERR(tlink);
+ break;
+ }
+ tcon = tlink_tcon(tlink);
+ if (tcon && tcon->ses->server->ops->notify) {
+ rc = tcon->ses->server->ops->notify(xid,
+ filep, (void __user *)arg,
+ true /* return details */);
+ cifs_dbg(FYI, "ioctl notify info rc %d\n", rc);
+ } else
+ rc = -EOPNOTSUPP;
+ cifs_put_tlink(tlink);
+ break;
case CIFS_IOC_SHUTDOWN:
rc = cifs_shutdown(inode->i_sb, arg);
break;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 6803cb27eecc..bd374feeccaa 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -38,29 +38,28 @@ static int
symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
{
int rc;
- struct crypto_shash *md5 = NULL;
- struct sdesc *sdescmd5 = NULL;
+ struct shash_desc *md5 = NULL;
- rc = cifs_alloc_hash("md5", &md5, &sdescmd5);
+ rc = cifs_alloc_hash("md5", &md5);
if (rc)
goto symlink_hash_err;
- rc = crypto_shash_init(&sdescmd5->shash);
+ rc = crypto_shash_init(md5);
if (rc) {
cifs_dbg(VFS, "%s: Could not init md5 shash\n", __func__);
goto symlink_hash_err;
}
- rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len);
+ rc = crypto_shash_update(md5, link_str, link_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with link_str\n", __func__);
goto symlink_hash_err;
}
- rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
+ rc = crypto_shash_final(md5, md5_hash);
if (rc)
cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
symlink_hash_err:
- cifs_free_hash(&md5, &sdescmd5);
+ cifs_free_hash(&md5);
return rc;
}
@@ -202,40 +201,6 @@ out:
return rc;
}
-static int
-query_mf_symlink(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb, const unsigned char *path,
- char **symlinkinfo)
-{
- int rc;
- u8 *buf = NULL;
- unsigned int link_len = 0;
- unsigned int bytes_read = 0;
-
- buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- if (tcon->ses->server->ops->query_mf_symlink)
- rc = tcon->ses->server->ops->query_mf_symlink(xid, tcon,
- cifs_sb, path, buf, &bytes_read);
- else
- rc = -ENOSYS;
-
- if (rc)
- goto out;
-
- if (bytes_read == 0) { /* not a symlink */
- rc = -EINVAL;
- goto out;
- }
-
- rc = parse_mf_symlink(buf, bytes_read, &link_len, symlinkinfo);
-out:
- kfree(buf);
- return rc;
-}
-
int
check_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
@@ -245,6 +210,7 @@ check_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
u8 *buf = NULL;
unsigned int link_len = 0;
unsigned int bytes_read = 0;
+ char *symlink = NULL;
if (!couldbe_mf_symlink(fattr))
/* it's not a symlink */
@@ -266,7 +232,7 @@ check_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
if (bytes_read == 0) /* not a symlink */
goto out;
- rc = parse_mf_symlink(buf, bytes_read, &link_len, NULL);
+ rc = parse_mf_symlink(buf, bytes_read, &link_len, &symlink);
if (rc == -EINVAL) {
/* it's not a symlink */
rc = 0;
@@ -281,6 +247,7 @@ check_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
fattr->cf_mode &= ~S_IFMT;
fattr->cf_mode |= S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO;
fattr->cf_dtype = DT_LNK;
+ fattr->cf_symlink_target = symlink;
out:
kfree(buf);
return rc;
@@ -600,75 +567,6 @@ cifs_hl_exit:
return rc;
}
-const char *
-cifs_get_link(struct dentry *direntry, struct inode *inode,
- struct delayed_call *done)
-{
- int rc = -ENOMEM;
- unsigned int xid;
- const char *full_path;
- void *page;
- char *target_path = NULL;
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
- struct tcon_link *tlink = NULL;
- struct cifs_tcon *tcon;
- struct TCP_Server_Info *server;
-
- if (!direntry)
- return ERR_PTR(-ECHILD);
-
- xid = get_xid();
-
- tlink = cifs_sb_tlink(cifs_sb);
- if (IS_ERR(tlink)) {
- free_xid(xid);
- return ERR_CAST(tlink);
- }
- tcon = tlink_tcon(tlink);
- server = tcon->ses->server;
-
- page = alloc_dentry_path();
- full_path = build_path_from_dentry(direntry, page);
- if (IS_ERR(full_path)) {
- free_xid(xid);
- cifs_put_tlink(tlink);
- free_dentry_path(page);
- return ERR_CAST(full_path);
- }
-
- cifs_dbg(FYI, "Full path: %s inode = 0x%p\n", full_path, inode);
-
- rc = -EACCES;
- /*
- * First try Minshall+French Symlinks, if configured
- * and fallback to UNIX Extensions Symlinks.
- */
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
- rc = query_mf_symlink(xid, tcon, cifs_sb, full_path,
- &target_path);
-
- if (rc != 0 && server->ops->query_symlink) {
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- bool reparse_point = false;
-
- if (cifsi->cifsAttrs & ATTR_REPARSE)
- reparse_point = true;
-
- rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path,
- &target_path, reparse_point);
- }
-
- free_dentry_path(page);
- free_xid(xid);
- cifs_put_tlink(tlink);
- if (rc != 0) {
- kfree(target_path);
- return ERR_PTR(rc);
- }
- set_delayed_call(done, kfree_link, target_path);
- return target_path;
-}
-
int
cifs_symlink(struct user_namespace *mnt_userns, struct inode *inode,
struct dentry *direntry, const char *symname)
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 87f60f736731..3e68d8208cf5 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -117,8 +117,8 @@ tconInfoAlloc(void)
ret_buf = kzalloc(sizeof(*ret_buf), GFP_KERNEL);
if (!ret_buf)
return NULL;
- ret_buf->cfid = init_cached_dir();
- if (!ret_buf->cfid) {
+ ret_buf->cfids = init_cached_dirs();
+ if (!ret_buf->cfids) {
kfree(ret_buf);
return NULL;
}
@@ -144,7 +144,7 @@ tconInfoFree(struct cifs_tcon *tcon)
cifs_dbg(FYI, "Null buffer passed to tconInfoFree\n");
return;
}
- free_cached_dir(tcon);
+ free_cached_dirs(tcon->cfids);
atomic_dec(&tconInfoAllocCount);
kfree(tcon->nativeFileSystem);
kfree_sensitive(tcon->password);
@@ -400,6 +400,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
{
struct smb_hdr *buf = (struct smb_hdr *)buffer;
struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf;
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct cifsInodeInfo *pCifsInode;
@@ -464,9 +465,12 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE))
return false;
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(srv) ? srv->primary_server : srv;
+
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &srv->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid != buf->Tid)
continue;
@@ -525,7 +529,7 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
cifs_sb->mnt_cifs_serverino_autodisabled = true;
cifs_dbg(VFS, "Autodisabling the use of server inode numbers on %s\n",
- tcon ? tcon->treeName : "new server");
+ tcon ? tcon->tree_name : "new server");
cifs_dbg(VFS, "The server doesn't seem to support them properly or the files might be on different servers (DFS)\n");
cifs_dbg(VFS, "Hardlinks will not be recognized on this mount. Consider mounting with the \"noserverino\" option to silence this message.\n");
@@ -824,7 +828,7 @@ cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path)
free_dentry_path(page);
}
-/* parses DFS refferal V3 structure
+/* parses DFS referral V3 structure
* caller is responsible for freeing target_nodes
* returns:
* - on success - 0
@@ -1071,59 +1075,58 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
/**
* cifs_alloc_hash - allocate hash and hash context together
* @name: The name of the crypto hash algo
- * @shash: Where to put the pointer to the hash algo
- * @sdesc: Where to put the pointer to the hash descriptor
+ * @sdesc: SHASH descriptor where to put the pointer to the hash TFM
*
* The caller has to make sure @sdesc is initialized to either NULL or
- * a valid context. Both can be freed via cifs_free_hash().
+ * a valid context. It can be freed via cifs_free_hash().
*/
int
-cifs_alloc_hash(const char *name,
- struct crypto_shash **shash, struct sdesc **sdesc)
+cifs_alloc_hash(const char *name, struct shash_desc **sdesc)
{
int rc = 0;
- size_t size;
+ struct crypto_shash *alg = NULL;
- if (*sdesc != NULL)
+ if (*sdesc)
return 0;
- *shash = crypto_alloc_shash(name, 0, 0);
- if (IS_ERR(*shash)) {
- cifs_dbg(VFS, "Could not allocate crypto %s\n", name);
- rc = PTR_ERR(*shash);
- *shash = NULL;
+ alg = crypto_alloc_shash(name, 0, 0);
+ if (IS_ERR(alg)) {
+ cifs_dbg(VFS, "Could not allocate shash TFM '%s'\n", name);
+ rc = PTR_ERR(alg);
*sdesc = NULL;
return rc;
}
- size = sizeof(struct shash_desc) + crypto_shash_descsize(*shash);
- *sdesc = kmalloc(size, GFP_KERNEL);
+ *sdesc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(alg), GFP_KERNEL);
if (*sdesc == NULL) {
- cifs_dbg(VFS, "no memory left to allocate crypto %s\n", name);
- crypto_free_shash(*shash);
- *shash = NULL;
+ cifs_dbg(VFS, "no memory left to allocate shash TFM '%s'\n", name);
+ crypto_free_shash(alg);
return -ENOMEM;
}
- (*sdesc)->shash.tfm = *shash;
+ (*sdesc)->tfm = alg;
return 0;
}
/**
* cifs_free_hash - free hash and hash context together
- * @shash: Where to find the pointer to the hash algo
- * @sdesc: Where to find the pointer to the hash descriptor
+ * @sdesc: Where to find the pointer to the hash TFM
*
- * Freeing a NULL hash or context is safe.
+ * Freeing a NULL descriptor is safe.
*/
void
-cifs_free_hash(struct crypto_shash **shash, struct sdesc **sdesc)
+cifs_free_hash(struct shash_desc **sdesc)
{
- kfree(*sdesc);
+ if (unlikely(!sdesc) || !*sdesc)
+ return;
+
+ if ((*sdesc)->tfm) {
+ crypto_free_shash((*sdesc)->tfm);
+ (*sdesc)->tfm = NULL;
+ }
+
+ kfree_sensitive(*sdesc);
*sdesc = NULL;
- if (*shash)
- crypto_free_shash(*shash);
- *shash = NULL;
}
/**
@@ -1328,7 +1331,7 @@ int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid,
char *treename, *dfspath, sep;
int treenamelen, linkpathlen, rc;
- treename = tcon->treeName;
+ treename = tcon->tree_name;
/* MS-DFSC: All paths in REQ_GET_DFS_REFERRAL and RESP_GET_DFS_REFERRAL
* messages MUST be encoded with exactly one leading backslash, not two
* leading backslashes.
diff --git a/fs/cifs/netlink.c b/fs/cifs/netlink.c
index 291cb606f149..147d9409252c 100644
--- a/fs/cifs/netlink.c
+++ b/fs/cifs/netlink.c
@@ -51,6 +51,7 @@ struct genl_family cifs_genl_family = {
.policy = cifs_genl_policy,
.ops = cifs_genl_ops,
.n_ops = ARRAY_SIZE(cifs_genl_ops),
+ .resv_start_op = CIFS_GENL_CMD_SWN_NOTIFY + 1,
.mcgrps = cifs_genl_mcgrps,
.n_mcgrps = ARRAY_SIZE(cifs_genl_mcgrps),
};
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 8e060c00c969..2d75ba5aaa8a 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -844,17 +844,34 @@ static bool emit_cached_dirents(struct cached_dirents *cde,
struct dir_context *ctx)
{
struct cached_dirent *dirent;
- int rc;
+ bool rc;
list_for_each_entry(dirent, &cde->entries, entry) {
- if (ctx->pos >= dirent->pos)
+ /*
+ * Skip all early entries prior to the current lseek()
+ * position.
+ */
+ if (ctx->pos > dirent->pos)
continue;
+ /*
+ * We recorded the current ->pos value for the dirent
+ * when we stored it in the cache.
+ * However, this sequence of ->pos values may have holes
+ * in it, for example dot-dirs returned from the server
+ * are suppressed.
+ * Handle this bu forcing ctx->pos to be the same as the
+ * ->pos of the current dirent we emit from the cache.
+ * This means that when we emit these entries from the cache
+ * we now emit them with the same ->pos value as in the
+ * initial scan.
+ */
ctx->pos = dirent->pos;
rc = dir_emit(ctx, dirent->name, dirent->namelen,
dirent->fattr.cf_uniqueid,
dirent->fattr.cf_dtype);
if (!rc)
return rc;
+ ctx->pos++;
}
return true;
}
@@ -994,6 +1011,8 @@ static int cifs_filldir(char *find_entry, struct file *file,
cifs_unix_basic_to_fattr(&fattr,
&((FILE_UNIX_INFO *)find_entry)->basic,
cifs_sb);
+ if (S_ISLNK(fattr.cf_mode))
+ fattr.cf_flags |= CIFS_FATTR_NEED_REVAL;
break;
case SMB_FIND_FILE_INFO_STANDARD:
cifs_std_info_to_fattr(&fattr,
@@ -1202,10 +1221,10 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
ctx->pos, tmp_buf);
cifs_save_resume_key(current_entry, cifsFile);
break;
- } else
- current_entry =
- nxt_dir_entry(current_entry, end_of_smb,
- cifsFile->srch_inf.info_level);
+ }
+ current_entry =
+ nxt_dir_entry(current_entry, end_of_smb,
+ cifsFile->srch_inf.info_level);
}
kfree(tmp_buf);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 3af3b05b6c74..92e4278ec35d 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -496,6 +496,7 @@ out:
cifs_put_tcp_session(chan->server, 0);
}
+ free_xid(xid);
return rc;
}
@@ -601,11 +602,6 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
/* BB FIXME add check that strings total less
than 335 or will need to send them as arrays */
- /* unicode strings, must be word aligned before the call */
-/* if ((long) bcc_ptr % 2) {
- *bcc_ptr = 0;
- bcc_ptr++;
- } */
/* copy user */
if (ses->user_name == NULL) {
/* null user mount */
@@ -1213,10 +1209,18 @@ out_free_smb_buf:
static void
sess_free_buffer(struct sess_data *sess_data)
{
+ struct kvec *iov = sess_data->iov;
+
+ /*
+ * Zero the session data before freeing, as it might contain sensitive info (keys, etc).
+ * Note that iov[1] is already freed by caller.
+ */
+ if (sess_data->buf0_type != CIFS_NO_BUFFER && iov[0].iov_base)
+ memzero_explicit(iov[0].iov_base, iov[0].iov_len);
- free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base);
+ free_rsp_buf(sess_data->buf0_type, iov[0].iov_base);
sess_data->buf0_type = CIFS_NO_BUFFER;
- kfree(sess_data->iov[2].iov_base);
+ kfree_sensitive(iov[2].iov_base);
}
static int
@@ -1318,7 +1322,7 @@ sess_auth_ntlmv2(struct sess_data *sess_data)
}
if (ses->capabilities & CAP_UNICODE) {
- if (sess_data->iov[0].iov_len % 2) {
+ if (!IS_ALIGNED(sess_data->iov[0].iov_len, 2)) {
*bcc_ptr = 0;
bcc_ptr++;
}
@@ -1358,7 +1362,7 @@ sess_auth_ntlmv2(struct sess_data *sess_data)
/* no string area to decode, do nothing */
} else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
/* unicode string area must be word-aligned */
- if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
+ if (!IS_ALIGNED((unsigned long)bcc_ptr - (unsigned long)smb_buf, 2)) {
++bcc_ptr;
--bytes_remaining;
}
@@ -1374,7 +1378,7 @@ out:
sess_data->result = rc;
sess_data->func = NULL;
sess_free_buffer(sess_data);
- kfree(ses->auth_key.response);
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.response = NULL;
}
@@ -1442,8 +1446,7 @@ sess_auth_kerberos(struct sess_data *sess_data)
if (ses->capabilities & CAP_UNICODE) {
/* unicode strings must be word aligned */
- if ((sess_data->iov[0].iov_len
- + sess_data->iov[1].iov_len) % 2) {
+ if (!IS_ALIGNED(sess_data->iov[0].iov_len + sess_data->iov[1].iov_len, 2)) {
*bcc_ptr = 0;
bcc_ptr++;
}
@@ -1494,7 +1497,7 @@ sess_auth_kerberos(struct sess_data *sess_data)
/* no string area to decode, do nothing */
} else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
/* unicode string area must be word-aligned */
- if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
+ if (!IS_ALIGNED((unsigned long)bcc_ptr - (unsigned long)smb_buf, 2)) {
++bcc_ptr;
--bytes_remaining;
}
@@ -1513,7 +1516,7 @@ out:
sess_data->result = rc;
sess_data->func = NULL;
sess_free_buffer(sess_data);
- kfree(ses->auth_key.response);
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.response = NULL;
}
@@ -1546,7 +1549,7 @@ _sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data)
bcc_ptr = sess_data->iov[2].iov_base;
/* unicode strings must be word aligned */
- if ((sess_data->iov[0].iov_len + sess_data->iov[1].iov_len) % 2) {
+ if (!IS_ALIGNED(sess_data->iov[0].iov_len + sess_data->iov[1].iov_len, 2)) {
*bcc_ptr = 0;
bcc_ptr++;
}
@@ -1648,7 +1651,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
out_free_ntlmsspblob:
- kfree(ntlmsspblob);
+ kfree_sensitive(ntlmsspblob);
out:
sess_free_buffer(sess_data);
@@ -1658,9 +1661,9 @@ out:
}
/* Else error. Cleanup */
- kfree(ses->auth_key.response);
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.response = NULL;
- kfree(ses->ntlmssp);
+ kfree_sensitive(ses->ntlmssp);
ses->ntlmssp = NULL;
sess_data->func = NULL;
@@ -1747,7 +1750,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
/* no string area to decode, do nothing */
} else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
/* unicode string area must be word-aligned */
- if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
+ if (!IS_ALIGNED((unsigned long)bcc_ptr - (unsigned long)smb_buf, 2)) {
++bcc_ptr;
--bytes_remaining;
}
@@ -1759,7 +1762,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
}
out_free_ntlmsspblob:
- kfree(ntlmsspblob);
+ kfree_sensitive(ntlmsspblob);
out:
sess_free_buffer(sess_data);
@@ -1767,9 +1770,9 @@ out:
rc = sess_establish_session(sess_data);
/* Cleanup */
- kfree(ses->auth_key.response);
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.response = NULL;
- kfree(ses->ntlmssp);
+ kfree_sensitive(ses->ntlmssp);
ses->ntlmssp = NULL;
sess_data->func = NULL;
@@ -1845,7 +1848,7 @@ int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
rc = sess_data->result;
out:
- kfree(sess_data);
+ kfree_sensitive(sess_data);
return rc;
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index f36b2d2d40ca..50480751e521 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -542,31 +542,32 @@ cifs_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
-static int
-cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb, const char *full_path,
- FILE_ALL_INFO *data, bool *adjustTZ, bool *symlink)
+static int cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, const char *full_path,
+ struct cifs_open_info_data *data, bool *adjustTZ, bool *symlink)
{
int rc;
+ FILE_ALL_INFO fi = {};
*symlink = false;
/* could do find first instead but this returns more info */
- rc = CIFSSMBQPathInfo(xid, tcon, full_path, data, 0 /* not legacy */,
- cifs_sb->local_nls, cifs_remap(cifs_sb));
+ rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */, cifs_sb->local_nls,
+ cifs_remap(cifs_sb));
/*
* BB optimize code so we do not make the above call when server claims
* no NT SMB support and the above call failed at least once - set flag
* in tcon or mount.
*/
if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) {
- rc = SMBQueryInformation(xid, tcon, full_path, data,
- cifs_sb->local_nls,
+ rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls,
cifs_remap(cifs_sb));
+ if (!rc)
+ move_cifs_info_to_smb2(&data->fi, &fi);
*adjustTZ = true;
}
- if (!rc && (le32_to_cpu(data->Attributes) & ATTR_REPARSE)) {
+ if (!rc && (le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) {
int tmprc;
int oplock = 0;
struct cifs_fid fid;
@@ -592,10 +593,9 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
-static int
-cifs_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb, const char *full_path,
- u64 *uniqueid, FILE_ALL_INFO *data)
+static int cifs_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, const char *full_path,
+ u64 *uniqueid, struct cifs_open_info_data *unused)
{
/*
* We can not use the IndexNumber field by default from Windows or
@@ -613,11 +613,22 @@ cifs_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon,
cifs_remap(cifs_sb));
}
-static int
-cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_fid *fid, FILE_ALL_INFO *data)
+static int cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *cfile, struct cifs_open_info_data *data)
{
- return CIFSSMBQFileInfo(xid, tcon, fid->netfid, data);
+ int rc;
+ FILE_ALL_INFO fi = {};
+
+ if (cfile->symlink_target) {
+ data->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL);
+ if (!data->symlink_target)
+ return -ENOMEM;
+ }
+
+ rc = CIFSSMBQFileInfo(xid, tcon, cfile->fid.netfid, &fi);
+ if (!rc)
+ move_cifs_info_to_smb2(&data->fi, &fi);
+ return rc;
}
static void
@@ -702,19 +713,20 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path,
cifsInode->cifsAttrs = dosattrs;
}
-static int
-cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
- __u32 *oplock, FILE_ALL_INFO *buf)
+static int cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock,
+ void *buf)
{
+ FILE_ALL_INFO *fi = buf;
+
if (!(oparms->tcon->ses->capabilities & CAP_NT_SMBS))
return SMBLegacyOpen(xid, oparms->tcon, oparms->path,
oparms->disposition,
oparms->desired_access,
oparms->create_options,
- &oparms->fid->netfid, oplock, buf,
+ &oparms->fid->netfid, oplock, fi,
oparms->cifs_sb->local_nls,
cifs_remap(oparms->cifs_sb));
- return CIFS_open(xid, oparms, oplock, buf);
+ return CIFS_open(xid, oparms, oplock, fi);
}
static void
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 9dfd2dd612c2..ffbd9a99fc12 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -20,40 +20,125 @@
#include "cifs_unicode.h"
#include "fscache.h"
#include "smb2proto.h"
+#include "smb2status.h"
-int
-smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
- __u32 *oplock, FILE_ALL_INFO *buf)
+static struct smb2_symlink_err_rsp *symlink_data(const struct kvec *iov)
+{
+ struct smb2_err_rsp *err = iov->iov_base;
+ struct smb2_symlink_err_rsp *sym = ERR_PTR(-EINVAL);
+ u32 len;
+
+ if (err->ErrorContextCount) {
+ struct smb2_error_context_rsp *p, *end;
+
+ len = (u32)err->ErrorContextCount * (offsetof(struct smb2_error_context_rsp,
+ ErrorContextData) +
+ sizeof(struct smb2_symlink_err_rsp));
+ if (le32_to_cpu(err->ByteCount) < len || iov->iov_len < len + sizeof(*err))
+ return ERR_PTR(-EINVAL);
+
+ p = (struct smb2_error_context_rsp *)err->ErrorData;
+ end = (struct smb2_error_context_rsp *)((u8 *)err + iov->iov_len);
+ do {
+ if (le32_to_cpu(p->ErrorId) == SMB2_ERROR_ID_DEFAULT) {
+ sym = (struct smb2_symlink_err_rsp *)&p->ErrorContextData;
+ break;
+ }
+ cifs_dbg(FYI, "%s: skipping unhandled error context: 0x%x\n",
+ __func__, le32_to_cpu(p->ErrorId));
+
+ len = ALIGN(le32_to_cpu(p->ErrorDataLength), 8);
+ p = (struct smb2_error_context_rsp *)((u8 *)&p->ErrorContextData + len);
+ } while (p < end);
+ } else if (le32_to_cpu(err->ByteCount) >= sizeof(*sym) &&
+ iov->iov_len >= SMB2_SYMLINK_STRUCT_SIZE) {
+ sym = (struct smb2_symlink_err_rsp *)err->ErrorData;
+ }
+
+ if (!IS_ERR(sym) && (le32_to_cpu(sym->SymLinkErrorTag) != SYMLINK_ERROR_TAG ||
+ le32_to_cpu(sym->ReparseTag) != IO_REPARSE_TAG_SYMLINK))
+ sym = ERR_PTR(-EINVAL);
+
+ return sym;
+}
+
+int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, const struct kvec *iov, char **path)
+{
+ struct smb2_symlink_err_rsp *sym;
+ unsigned int sub_offs, sub_len;
+ unsigned int print_offs, print_len;
+ char *s;
+
+ if (!cifs_sb || !iov || !iov->iov_base || !iov->iov_len || !path)
+ return -EINVAL;
+
+ sym = symlink_data(iov);
+ if (IS_ERR(sym))
+ return PTR_ERR(sym);
+
+ sub_len = le16_to_cpu(sym->SubstituteNameLength);
+ sub_offs = le16_to_cpu(sym->SubstituteNameOffset);
+ print_len = le16_to_cpu(sym->PrintNameLength);
+ print_offs = le16_to_cpu(sym->PrintNameOffset);
+
+ if (iov->iov_len < SMB2_SYMLINK_STRUCT_SIZE + sub_offs + sub_len ||
+ iov->iov_len < SMB2_SYMLINK_STRUCT_SIZE + print_offs + print_len)
+ return -EINVAL;
+
+ s = cifs_strndup_from_utf16((char *)sym->PathBuffer + sub_offs, sub_len, true,
+ cifs_sb->local_nls);
+ if (!s)
+ return -ENOMEM;
+ convert_delimiter(s, '/');
+ cifs_dbg(FYI, "%s: symlink target: %s\n", __func__, s);
+
+ *path = s;
+ return 0;
+}
+
+int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf)
{
int rc;
__le16 *smb2_path;
- struct smb2_file_all_info *smb2_data = NULL;
__u8 smb2_oplock;
+ struct cifs_open_info_data *data = buf;
+ struct smb2_file_all_info file_info = {};
+ struct smb2_file_all_info *smb2_data = data ? &file_info : NULL;
+ struct kvec err_iov = {};
+ int err_buftype = CIFS_NO_BUFFER;
struct cifs_fid *fid = oparms->fid;
struct network_resiliency_req nr_ioctl_req;
smb2_path = cifs_convert_path_to_utf16(oparms->path, oparms->cifs_sb);
- if (smb2_path == NULL) {
- rc = -ENOMEM;
- goto out;
- }
-
- smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
- GFP_KERNEL);
- if (smb2_data == NULL) {
- rc = -ENOMEM;
- goto out;
- }
+ if (smb2_path == NULL)
+ return -ENOMEM;
oparms->desired_access |= FILE_READ_ATTRIBUTES;
smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH;
- rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL,
- NULL, NULL);
+ rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL, &err_iov,
+ &err_buftype);
+ if (rc && data) {
+ struct smb2_hdr *hdr = err_iov.iov_base;
+
+ if (unlikely(!err_iov.iov_base || err_buftype == CIFS_NO_BUFFER))
+ rc = -ENOMEM;
+ else if (hdr->Status == STATUS_STOPPED_ON_SYMLINK) {
+ rc = smb2_parse_symlink_response(oparms->cifs_sb, &err_iov,
+ &data->symlink_target);
+ if (!rc) {
+ memset(smb2_data, 0, sizeof(*smb2_data));
+ oparms->create_options |= OPEN_REPARSE_POINT;
+ rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data,
+ NULL, NULL, NULL);
+ oparms->create_options &= ~OPEN_REPARSE_POINT;
+ }
+ }
+ }
+
if (rc)
goto out;
-
if (oparms->tcon->use_resilient) {
/* default timeout is 0, servers pick default (120 seconds) */
nr_ioctl_req.Timeout =
@@ -73,7 +158,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
rc = 0;
}
- if (buf) {
+ if (smb2_data) {
/* if open response does not have IndexNumber field - get it */
if (smb2_data->IndexNumber == 0) {
rc = SMB2_get_srv_num(xid, oparms->tcon,
@@ -89,12 +174,12 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
rc = 0;
}
}
- move_smb2_info_to_cifs(buf, smb2_data);
+ memcpy(&data->fi, smb2_data, sizeof(data->fi));
}
*oplock = smb2_oplock;
out:
- kfree(smb2_data);
+ free_rsp_buf(err_buftype, err_iov.iov_base);
kfree(smb2_path);
return rc;
}
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index b83f59051b26..68e08c85fbb8 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -24,6 +24,7 @@
#include "smb2pdu.h"
#include "smb2proto.h"
#include "cached_dir.h"
+#include "smb2status.h"
static void
free_set_inf_compound(struct smb_rqst *rqst)
@@ -50,13 +51,15 @@ struct cop_vars {
/*
* note: If cfile is passed, the reference to it is dropped here.
* So make sure that you do not reuse cfile after return from this func.
+ *
+ * If passing @err_iov and @err_buftype, ensure to make them both large enough (>= 3) to hold all
+ * error responses. Caller is also responsible for freeing them up.
*/
-static int
-smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb, const char *full_path,
- __u32 desired_access, __u32 create_disposition,
- __u32 create_options, umode_t mode, void *ptr, int command,
- struct cifsFileInfo *cfile)
+static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, const char *full_path,
+ __u32 desired_access, __u32 create_disposition, __u32 create_options,
+ umode_t mode, void *ptr, int command, struct cifsFileInfo *cfile,
+ struct kvec *err_iov, int *err_buftype)
{
struct cop_vars *vars = NULL;
struct kvec *rsp_iov;
@@ -70,6 +73,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
int num_rqst = 0;
int resp_buftype[3];
struct smb2_query_info_rsp *qi_rsp = NULL;
+ struct cifs_open_info_data *idata;
int flags = 0;
__u8 delete_pending[8] = {1, 0, 0, 0, 0, 0, 0, 0};
unsigned int size[2];
@@ -379,20 +383,25 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
SMB2_open_free(&rqst[0]);
if (rc == -EREMCHG) {
- pr_warn_once("server share %s deleted\n", tcon->treeName);
+ pr_warn_once("server share %s deleted\n", tcon->tree_name);
tcon->need_reconnect = true;
}
switch (command) {
case SMB2_OP_QUERY_INFO:
+ idata = ptr;
+ if (rc == 0 && cfile && cfile->symlink_target) {
+ idata->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL);
+ if (!idata->symlink_target)
+ rc = -ENOMEM;
+ }
if (rc == 0) {
qi_rsp = (struct smb2_query_info_rsp *)
rsp_iov[1].iov_base;
rc = smb2_validate_and_copy_iov(
le16_to_cpu(qi_rsp->OutputBufferOffset),
le32_to_cpu(qi_rsp->OutputBufferLength),
- &rsp_iov[1], sizeof(struct smb2_file_all_info),
- ptr);
+ &rsp_iov[1], sizeof(idata->fi), (char *)&idata->fi);
}
if (rqst[1].rq_iov)
SMB2_query_info_free(&rqst[1]);
@@ -406,13 +415,20 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
tcon->tid);
break;
case SMB2_OP_POSIX_QUERY_INFO:
+ idata = ptr;
+ if (rc == 0 && cfile && cfile->symlink_target) {
+ idata->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL);
+ if (!idata->symlink_target)
+ rc = -ENOMEM;
+ }
if (rc == 0) {
qi_rsp = (struct smb2_query_info_rsp *)
rsp_iov[1].iov_base;
rc = smb2_validate_and_copy_iov(
le16_to_cpu(qi_rsp->OutputBufferOffset),
le32_to_cpu(qi_rsp->OutputBufferLength),
- &rsp_iov[1], sizeof(struct smb311_posix_qinfo) /* add SIDs */, ptr);
+ &rsp_iov[1], sizeof(idata->posix_fi) /* add SIDs */,
+ (char *)&idata->posix_fi);
}
if (rqst[1].rq_iov)
SMB2_query_info_free(&rqst[1]);
@@ -477,42 +493,33 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
free_set_inf_compound(rqst);
break;
}
- free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
- free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
- free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
+
+ if (rc && err_iov && err_buftype) {
+ memcpy(err_iov, rsp_iov, 3 * sizeof(*err_iov));
+ memcpy(err_buftype, resp_buftype, 3 * sizeof(*err_buftype));
+ } else {
+ free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+ free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+ free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
+ }
kfree(vars);
return rc;
}
-void
-move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src)
-{
- memcpy(dst, src, (size_t)(&src->CurrentByteOffset) - (size_t)src);
- dst->CurrentByteOffset = src->CurrentByteOffset;
- dst->Mode = src->Mode;
- dst->AlignmentRequirement = src->AlignmentRequirement;
- dst->IndexNumber1 = 0; /* we don't use it */
-}
-
-int
-smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb, const char *full_path,
- FILE_ALL_INFO *data, bool *adjust_tz, bool *reparse)
+int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, const char *full_path,
+ struct cifs_open_info_data *data, bool *adjust_tz, bool *reparse)
{
int rc;
- struct smb2_file_all_info *smb2_data;
__u32 create_options = 0;
struct cifsFileInfo *cfile;
struct cached_fid *cfid = NULL;
+ struct kvec err_iov[3] = {};
+ int err_buftype[3] = {};
*adjust_tz = false;
*reparse = false;
- smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
- GFP_KERNEL);
- if (smb2_data == NULL)
- return -ENOMEM;
-
if (strcmp(full_path, ""))
rc = -ENOENT;
else
@@ -520,63 +527,58 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
/* If it is a root and its handle is cached then use it */
if (!rc) {
if (cfid->file_all_info_is_valid) {
- move_smb2_info_to_cifs(data,
- &cfid->file_all_info);
+ memcpy(&data->fi, &cfid->file_all_info, sizeof(data->fi));
} else {
- rc = SMB2_query_info(xid, tcon,
- cfid->fid.persistent_fid,
- cfid->fid.volatile_fid, smb2_data);
- if (!rc)
- move_smb2_info_to_cifs(data, smb2_data);
+ rc = SMB2_query_info(xid, tcon, cfid->fid.persistent_fid,
+ cfid->fid.volatile_fid, &data->fi);
}
close_cached_dir(cfid);
- goto out;
+ return rc;
}
cifs_get_readable_path(tcon, full_path, &cfile);
- rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
- FILE_READ_ATTRIBUTES, FILE_OPEN, create_options,
- ACL_NO_MODE, smb2_data, SMB2_OP_QUERY_INFO, cfile);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN,
+ create_options, ACL_NO_MODE, data, SMB2_OP_QUERY_INFO, cfile,
+ err_iov, err_buftype);
if (rc == -EOPNOTSUPP) {
+ if (err_iov[0].iov_base && err_buftype[0] != CIFS_NO_BUFFER &&
+ ((struct smb2_hdr *)err_iov[0].iov_base)->Command == SMB2_CREATE &&
+ ((struct smb2_hdr *)err_iov[0].iov_base)->Status == STATUS_STOPPED_ON_SYMLINK) {
+ rc = smb2_parse_symlink_response(cifs_sb, err_iov, &data->symlink_target);
+ if (rc)
+ goto out;
+ }
*reparse = true;
create_options |= OPEN_REPARSE_POINT;
/* Failed on a symbolic link - query a reparse point info */
cifs_get_readable_path(tcon, full_path, &cfile);
- rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
- FILE_READ_ATTRIBUTES, FILE_OPEN,
- create_options, ACL_NO_MODE,
- smb2_data, SMB2_OP_QUERY_INFO, cfile);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES,
+ FILE_OPEN, create_options, ACL_NO_MODE, data,
+ SMB2_OP_QUERY_INFO, cfile, NULL, NULL);
}
- if (rc)
- goto out;
- move_smb2_info_to_cifs(data, smb2_data);
out:
- kfree(smb2_data);
+ free_rsp_buf(err_buftype[0], err_iov[0].iov_base);
+ free_rsp_buf(err_buftype[1], err_iov[1].iov_base);
+ free_rsp_buf(err_buftype[2], err_iov[2].iov_base);
return rc;
}
-int
-smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb, const char *full_path,
- struct smb311_posix_qinfo *data, bool *adjust_tz, bool *reparse)
+int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, const char *full_path,
+ struct cifs_open_info_data *data, bool *adjust_tz, bool *reparse)
{
int rc;
__u32 create_options = 0;
struct cifsFileInfo *cfile;
- struct smb311_posix_qinfo *smb2_data;
+ struct kvec err_iov[3] = {};
+ int err_buftype[3] = {};
*adjust_tz = false;
*reparse = false;
- /* BB TODO: Make struct larger when add support for parsing owner SIDs */
- smb2_data = kzalloc(sizeof(struct smb311_posix_qinfo),
- GFP_KERNEL);
- if (smb2_data == NULL)
- return -ENOMEM;
-
/*
* BB TODO: Add support for using the cached root handle.
* Create SMB2_query_posix_info worker function to do non-compounded query
@@ -585,29 +587,32 @@ smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
*/
cifs_get_readable_path(tcon, full_path, &cfile);
- rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
- FILE_READ_ATTRIBUTES, FILE_OPEN, create_options,
- ACL_NO_MODE, smb2_data, SMB2_OP_POSIX_QUERY_INFO, cfile);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN,
+ create_options, ACL_NO_MODE, data, SMB2_OP_POSIX_QUERY_INFO, cfile,
+ err_iov, err_buftype);
if (rc == -EOPNOTSUPP) {
/* BB TODO: When support for special files added to Samba re-verify this path */
+ if (err_iov[0].iov_base && err_buftype[0] != CIFS_NO_BUFFER &&
+ ((struct smb2_hdr *)err_iov[0].iov_base)->Command == SMB2_CREATE &&
+ ((struct smb2_hdr *)err_iov[0].iov_base)->Status == STATUS_STOPPED_ON_SYMLINK) {
+ rc = smb2_parse_symlink_response(cifs_sb, err_iov, &data->symlink_target);
+ if (rc)
+ goto out;
+ }
*reparse = true;
create_options |= OPEN_REPARSE_POINT;
/* Failed on a symbolic link - query a reparse point info */
cifs_get_readable_path(tcon, full_path, &cfile);
- rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
- FILE_READ_ATTRIBUTES, FILE_OPEN,
- create_options, ACL_NO_MODE,
- smb2_data, SMB2_OP_POSIX_QUERY_INFO, cfile);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES,
+ FILE_OPEN, create_options, ACL_NO_MODE, data,
+ SMB2_OP_POSIX_QUERY_INFO, cfile, NULL, NULL);
}
- if (rc)
- goto out;
-
- /* TODO: will need to allow for the 2 SIDs when add support for getting owner UID/GID */
- memcpy(data, smb2_data, sizeof(struct smb311_posix_qinfo));
out:
- kfree(smb2_data);
+ free_rsp_buf(err_buftype[0], err_iov[0].iov_base);
+ free_rsp_buf(err_buftype[1], err_iov[1].iov_base);
+ free_rsp_buf(err_buftype[2], err_iov[2].iov_base);
return rc;
}
@@ -619,7 +624,7 @@ smb2_mkdir(const unsigned int xid, struct inode *parent_inode, umode_t mode,
return smb2_compound_op(xid, tcon, cifs_sb, name,
FILE_WRITE_ATTRIBUTES, FILE_CREATE,
CREATE_NOT_FILE, mode, NULL, SMB2_OP_MKDIR,
- NULL);
+ NULL, NULL, NULL);
}
void
@@ -641,7 +646,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name,
tmprc = smb2_compound_op(xid, tcon, cifs_sb, name,
FILE_WRITE_ATTRIBUTES, FILE_CREATE,
CREATE_NOT_FILE, ACL_NO_MODE,
- &data, SMB2_OP_SET_INFO, cfile);
+ &data, SMB2_OP_SET_INFO, cfile, NULL, NULL);
if (tmprc == 0)
cifs_i->cifsAttrs = dosattrs;
}
@@ -650,9 +655,10 @@ int
smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
struct cifs_sb_info *cifs_sb)
{
+ drop_cached_dir_by_name(xid, tcon, name, cifs_sb);
return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
CREATE_NOT_FILE, ACL_NO_MODE,
- NULL, SMB2_OP_RMDIR, NULL);
+ NULL, SMB2_OP_RMDIR, NULL, NULL, NULL);
}
int
@@ -661,7 +667,7 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
{
return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
- ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL);
+ ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL, NULL, NULL);
}
static int
@@ -680,7 +686,7 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon,
}
rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access,
FILE_OPEN, 0, ACL_NO_MODE, smb2_to_name,
- command, cfile);
+ command, cfile, NULL, NULL);
smb2_rename_path:
kfree(smb2_to_name);
return rc;
@@ -693,6 +699,7 @@ smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon,
{
struct cifsFileInfo *cfile;
+ drop_cached_dir_by_name(xid, tcon, from_name, cifs_sb);
cifs_get_writable_path(tcon, from_name, FIND_WR_WITH_DELETE, &cfile);
return smb2_set_path_attr(xid, tcon, from_name, to_name,
@@ -720,7 +727,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile);
return smb2_compound_op(xid, tcon, cifs_sb, full_path,
FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE,
- &eof, SMB2_OP_SET_EOF, cfile);
+ &eof, SMB2_OP_SET_EOF, cfile, NULL, NULL);
}
int
@@ -746,7 +753,8 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile);
rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
FILE_WRITE_ATTRIBUTES, FILE_OPEN,
- 0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, cfile);
+ 0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, cfile,
+ NULL, NULL);
cifs_put_tlink(tlink);
return rc;
}
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index d73e5672aac4..572293c18e16 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -135,6 +135,7 @@ static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len,
int
smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server)
{
+ struct TCP_Server_Info *pserver;
struct smb2_hdr *shdr = (struct smb2_hdr *)buf;
struct smb2_pdu *pdu = (struct smb2_pdu *)shdr;
int hdr_size = sizeof(struct smb2_hdr);
@@ -143,6 +144,9 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server)
__u32 calc_len; /* calculated length */
__u64 mid;
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
/*
* Add function to do table lookup of StructureSize by command
* ie Validate the wct via smb2_struct_sizes table above
@@ -155,7 +159,7 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server)
/* decrypt frame now that it is completely read in */
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(iter, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(iter, &pserver->smb_ses_list, smb_ses_list) {
if (iter->Suid == le64_to_cpu(thdr->SessionId)) {
ses = iter;
break;
@@ -248,7 +252,7 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server)
* Some windows servers (win2016) will pad also the final
* PDU in a compound to 8 bytes.
*/
- if (((calc_len + 7) & ~7) == len)
+ if (ALIGN(calc_len, 8) == len)
return 0;
/*
@@ -608,51 +612,52 @@ smb2_tcon_find_pending_open_lease(struct cifs_tcon *tcon,
}
static bool
-smb2_is_valid_lease_break(char *buffer)
+smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
{
struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer;
- struct TCP_Server_Info *server;
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct cifs_pending_open *open;
cifs_dbg(FYI, "Checking for lease break\n");
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
- list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
- spin_lock(&tcon->open_file_lock);
- cifs_stats_inc(
- &tcon->stats.cifs_stats.num_oplock_brks);
- if (smb2_tcon_has_lease(tcon, rsp)) {
- spin_unlock(&tcon->open_file_lock);
- spin_unlock(&cifs_tcp_ses_lock);
- return true;
- }
- open = smb2_tcon_find_pending_open_lease(tcon,
- rsp);
- if (open) {
- __u8 lease_key[SMB2_LEASE_KEY_SIZE];
- struct tcon_link *tlink;
-
- tlink = cifs_get_tlink(open->tlink);
- memcpy(lease_key, open->lease_key,
- SMB2_LEASE_KEY_SIZE);
- spin_unlock(&tcon->open_file_lock);
- spin_unlock(&cifs_tcp_ses_lock);
- smb2_queue_pending_open_break(tlink,
- lease_key,
- rsp->NewLeaseState);
- return true;
- }
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ spin_lock(&tcon->open_file_lock);
+ cifs_stats_inc(
+ &tcon->stats.cifs_stats.num_oplock_brks);
+ if (smb2_tcon_has_lease(tcon, rsp)) {
spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
+ return true;
+ }
+ open = smb2_tcon_find_pending_open_lease(tcon,
+ rsp);
+ if (open) {
+ __u8 lease_key[SMB2_LEASE_KEY_SIZE];
+ struct tcon_link *tlink;
+
+ tlink = cifs_get_tlink(open->tlink);
+ memcpy(lease_key, open->lease_key,
+ SMB2_LEASE_KEY_SIZE);
+ spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
+ smb2_queue_pending_open_break(tlink,
+ lease_key,
+ rsp->NewLeaseState);
+ return true;
+ }
+ spin_unlock(&tcon->open_file_lock);
- if (cached_dir_lease_break(tcon, rsp->LeaseKey)) {
- spin_unlock(&cifs_tcp_ses_lock);
- return true;
- }
+ if (cached_dir_lease_break(tcon, rsp->LeaseKey)) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return true;
}
}
}
@@ -671,6 +676,7 @@ bool
smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
{
struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer;
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct cifsInodeInfo *cinode;
@@ -684,16 +690,19 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
if (rsp->StructureSize !=
smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) {
if (le16_to_cpu(rsp->StructureSize) == 44)
- return smb2_is_valid_lease_break(buffer);
+ return smb2_is_valid_lease_break(buffer, server);
else
return false;
}
cifs_dbg(FYI, "oplock level 0x%x\n", rsp->OplockLevel);
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
@@ -870,8 +879,8 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server,
struct kvec *iov, int nvec)
{
int i, rc;
- struct sdesc *d;
struct smb2_hdr *hdr;
+ struct shash_desc *sha512 = NULL;
hdr = (struct smb2_hdr *)iov[0].iov_base;
/* neg prot are always taken */
@@ -901,14 +910,14 @@ ok:
if (rc)
return rc;
- d = server->secmech.sdescsha512;
- rc = crypto_shash_init(&d->shash);
+ sha512 = server->secmech.sha512;
+ rc = crypto_shash_init(sha512);
if (rc) {
cifs_dbg(VFS, "%s: Could not init sha512 shash\n", __func__);
return rc;
}
- rc = crypto_shash_update(&d->shash, ses->preauth_sha_hash,
+ rc = crypto_shash_update(sha512, ses->preauth_sha_hash,
SMB2_PREAUTH_HASH_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not update sha512 shash\n", __func__);
@@ -916,8 +925,7 @@ ok:
}
for (i = 0; i < nvec; i++) {
- rc = crypto_shash_update(&d->shash,
- iov[i].iov_base, iov[i].iov_len);
+ rc = crypto_shash_update(sha512, iov[i].iov_base, iov[i].iov_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update sha512 shash\n",
__func__);
@@ -925,7 +933,7 @@ ok:
}
}
- rc = crypto_shash_final(&d->shash, ses->preauth_sha_hash);
+ rc = crypto_shash_final(sha512, ses->preauth_sha_hash);
if (rc) {
cifs_dbg(VFS, "%s: Could not finalize sha512 shash\n",
__func__);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 421be43af425..880cd494afea 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -512,8 +512,7 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
static int
parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
- size_t buf_len,
- struct cifs_ses *ses)
+ size_t buf_len, struct cifs_ses *ses, bool in_mount)
{
struct network_interface_info_ioctl_rsp *p;
struct sockaddr_in *addr4;
@@ -531,6 +530,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
p = buf;
spin_lock(&ses->iface_lock);
+ ses->iface_count = 0;
/*
* Go through iface_list and do kref_put to remove
* any unused ifaces. ifaces in use will be removed
@@ -543,6 +543,21 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
}
spin_unlock(&ses->iface_lock);
+ /*
+ * Samba server e.g. can return an empty interface list in some cases,
+ * which would only be a problem if we were requesting multichannel
+ */
+ if (bytes_left == 0) {
+ /* avoid spamming logs every 10 minutes, so log only in mount */
+ if ((ses->chan_max > 1) && in_mount)
+ cifs_dbg(VFS,
+ "multichannel not available\n"
+ "Empty network interface list returned by server %s\n",
+ ses->server->hostname);
+ rc = -EINVAL;
+ goto out;
+ }
+
while (bytes_left >= sizeof(*p)) {
memset(&tmp_iface, 0, sizeof(tmp_iface));
tmp_iface.speed = le64_to_cpu(p->LinkSpeed);
@@ -637,9 +652,9 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
kref_put(&iface->refcount, release_iface);
} else
list_add_tail(&info->iface_head, &ses->iface_list);
- spin_unlock(&ses->iface_lock);
ses->iface_count++;
+ spin_unlock(&ses->iface_lock);
ses->iface_last_update = jiffies;
next_iface:
nb_iface++;
@@ -673,7 +688,7 @@ out:
}
int
-SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
+SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_mount)
{
int rc;
unsigned int ret_data_len = 0;
@@ -693,7 +708,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
goto out;
}
- rc = parse_server_interfaces(out_buf, ret_data_len, ses);
+ rc = parse_server_interfaces(out_buf, ret_data_len, ses, in_mount);
if (rc)
goto out;
@@ -729,7 +744,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return;
- SMB3_request_interfaces(xid, tcon);
+ SMB3_request_interfaces(xid, tcon, true /* called during mount */);
SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
FS_ATTRIBUTE_INFORMATION);
@@ -787,7 +802,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
rc = open_cached_dir(xid, tcon, full_path, cifs_sb, true, &cfid);
if (!rc) {
- if (cfid->is_valid) {
+ if (cfid->has_lease) {
close_cached_dir(cfid);
return 0;
}
@@ -817,33 +832,25 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
-static int
-smb2_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb, const char *full_path,
- u64 *uniqueid, FILE_ALL_INFO *data)
+static int smb2_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, const char *full_path,
+ u64 *uniqueid, struct cifs_open_info_data *data)
{
- *uniqueid = le64_to_cpu(data->IndexNumber);
+ *uniqueid = le64_to_cpu(data->fi.IndexNumber);
return 0;
}
-static int
-smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_fid *fid, FILE_ALL_INFO *data)
+static int smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *cfile, struct cifs_open_info_data *data)
{
- int rc;
- struct smb2_file_all_info *smb2_data;
+ struct cifs_fid *fid = &cfile->fid;
- smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
- GFP_KERNEL);
- if (smb2_data == NULL)
- return -ENOMEM;
-
- rc = SMB2_query_info(xid, tcon, fid->persistent_fid, fid->volatile_fid,
- smb2_data);
- if (!rc)
- move_smb2_info_to_cifs(data, smb2_data);
- kfree(smb2_data);
- return rc;
+ if (cfile->symlink_target) {
+ data->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL);
+ if (!data->symlink_target)
+ return -ENOMEM;
+ }
+ return SMB2_query_info(xid, tcon, fid->persistent_fid, fid->volatile_fid, &data->fi);
}
#ifdef CONFIG_CIFS_XATTR
@@ -1327,7 +1334,7 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
CIFSMaxBufSize, (char **)&res_key, &ret_data_len);
if (rc == -EOPNOTSUPP) {
- pr_warn_once("Server share %s does not support copy range\n", tcon->treeName);
+ pr_warn_once("Server share %s does not support copy range\n", tcon->tree_name);
goto req_res_key_exit;
} else if (rc) {
cifs_tcon_dbg(VFS, "refcpy ioctl error %d getting resume key\n", rc);
@@ -2012,9 +2019,10 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
static int
smb3_notify(const unsigned int xid, struct file *pfile,
- void __user *ioc_buf)
+ void __user *ioc_buf, bool return_changes)
{
- struct smb3_notify notify;
+ struct smb3_notify_info notify;
+ struct smb3_notify_info __user *pnotify_buf;
struct dentry *dentry = pfile->f_path.dentry;
struct inode *inode = file_inode(pfile);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -2022,10 +2030,12 @@ smb3_notify(const unsigned int xid, struct file *pfile,
struct cifs_fid fid;
struct cifs_tcon *tcon;
const unsigned char *path;
+ char *returned_ioctl_info = NULL;
void *page = alloc_dentry_path();
__le16 *utf16_path = NULL;
u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
int rc = 0;
+ __u32 ret_len = 0;
path = build_path_from_dentry(dentry, page);
if (IS_ERR(path)) {
@@ -2039,9 +2049,17 @@ smb3_notify(const unsigned int xid, struct file *pfile,
goto notify_exit;
}
- if (copy_from_user(&notify, ioc_buf, sizeof(struct smb3_notify))) {
- rc = -EFAULT;
- goto notify_exit;
+ if (return_changes) {
+ if (copy_from_user(&notify, ioc_buf, sizeof(struct smb3_notify_info))) {
+ rc = -EFAULT;
+ goto notify_exit;
+ }
+ } else {
+ if (copy_from_user(&notify, ioc_buf, sizeof(struct smb3_notify))) {
+ rc = -EFAULT;
+ goto notify_exit;
+ }
+ notify.data_len = 0;
}
tcon = cifs_sb_master_tcon(cifs_sb);
@@ -2058,12 +2076,22 @@ smb3_notify(const unsigned int xid, struct file *pfile,
goto notify_exit;
rc = SMB2_change_notify(xid, tcon, fid.persistent_fid, fid.volatile_fid,
- notify.watch_tree, notify.completion_filter);
+ notify.watch_tree, notify.completion_filter,
+ notify.data_len, &returned_ioctl_info, &ret_len);
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
cifs_dbg(FYI, "change notify for path %s rc %d\n", path, rc);
-
+ if (return_changes && (ret_len > 0) && (notify.data_len > 0)) {
+ if (ret_len > notify.data_len)
+ ret_len = notify.data_len;
+ pnotify_buf = (struct smb3_notify_info __user *)ioc_buf;
+ if (copy_to_user(pnotify_buf->notify_data, returned_ioctl_info, ret_len))
+ rc = -EFAULT;
+ else if (copy_to_user(&pnotify_buf->data_len, &ret_len, sizeof(ret_len)))
+ rc = -EFAULT;
+ }
+ kfree(returned_ioctl_info);
notify_exit:
free_dentry_path(page);
kfree(utf16_path);
@@ -2274,14 +2302,18 @@ static void
smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
{
struct smb2_hdr *shdr = (struct smb2_hdr *)buf;
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
if (shdr->Status != STATUS_NETWORK_NAME_DELETED)
return;
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) {
spin_lock(&tcon->tc_lock);
@@ -2289,7 +2321,7 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
pr_warn_once("Server share %s deleted.\n",
- tcon->treeName);
+ tcon->tree_name);
return;
}
}
@@ -2498,7 +2530,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon,
if (rc == -EREMCHG) {
tcon->need_reconnect = true;
pr_warn_once("server share %s deleted\n",
- tcon->treeName);
+ tcon->tree_name);
}
goto qic_exit;
}
@@ -2814,9 +2846,6 @@ parse_reparse_point(struct reparse_data_buffer *buf,
}
}
-#define SMB2_SYMLINK_STRUCT_SIZE \
- (sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp))
-
static int
smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, const char *full_path,
@@ -2828,13 +2857,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_open_parms oparms;
struct cifs_fid fid;
struct kvec err_iov = {NULL, 0};
- struct smb2_err_rsp *err_buf = NULL;
- struct smb2_symlink_err_rsp *symlink;
struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses);
- unsigned int sub_len;
- unsigned int sub_offset;
- unsigned int print_len;
- unsigned int print_offset;
int flags = CIFS_CP_CREATE_CLOSE_OP;
struct smb_rqst rqst[3];
int resp_buftype[3];
@@ -2951,47 +2974,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
goto querty_exit;
}
- err_buf = err_iov.iov_base;
- if (le32_to_cpu(err_buf->ByteCount) < sizeof(struct smb2_symlink_err_rsp) ||
- err_iov.iov_len < SMB2_SYMLINK_STRUCT_SIZE) {
- rc = -EINVAL;
- goto querty_exit;
- }
-
- symlink = (struct smb2_symlink_err_rsp *)err_buf->ErrorData;
- if (le32_to_cpu(symlink->SymLinkErrorTag) != SYMLINK_ERROR_TAG ||
- le32_to_cpu(symlink->ReparseTag) != IO_REPARSE_TAG_SYMLINK) {
- rc = -EINVAL;
- goto querty_exit;
- }
-
- /* open must fail on symlink - reset rc */
- rc = 0;
- sub_len = le16_to_cpu(symlink->SubstituteNameLength);
- sub_offset = le16_to_cpu(symlink->SubstituteNameOffset);
- print_len = le16_to_cpu(symlink->PrintNameLength);
- print_offset = le16_to_cpu(symlink->PrintNameOffset);
-
- if (err_iov.iov_len < SMB2_SYMLINK_STRUCT_SIZE + sub_offset + sub_len) {
- rc = -EINVAL;
- goto querty_exit;
- }
-
- if (err_iov.iov_len <
- SMB2_SYMLINK_STRUCT_SIZE + print_offset + print_len) {
- rc = -EINVAL;
- goto querty_exit;
- }
-
- *target_path = cifs_strndup_from_utf16(
- (char *)symlink->PathBuffer + sub_offset,
- sub_len, true, cifs_sb->local_nls);
- if (!(*target_path)) {
- rc = -ENOMEM;
- goto querty_exit;
- }
- convert_delimiter(*target_path, '/');
- cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
+ rc = smb2_parse_symlink_response(cifs_sb, &err_iov, target_path);
querty_exit:
cifs_dbg(FYI, "query symlink rc %d\n", rc);
@@ -4285,21 +4268,23 @@ init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign)
static int
smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
{
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
u8 *ses_enc_key;
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
- if (ses->Suid == ses_id) {
- spin_lock(&ses->ses_lock);
- ses_enc_key = enc ? ses->smb3encryptionkey :
- ses->smb3decryptionkey;
- memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE);
- spin_unlock(&ses->ses_lock);
- spin_unlock(&cifs_tcp_ses_lock);
- return 0;
- }
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (ses->Suid == ses_id) {
+ spin_lock(&ses->ses_lock);
+ ses_enc_key = enc ? ses->smb3encryptionkey :
+ ses->smb3decryptionkey;
+ memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE);
+ spin_unlock(&ses->ses_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
}
}
spin_unlock(&cifs_tcp_ses_lock);
@@ -4344,8 +4329,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
return rc;
}
- tfm = enc ? server->secmech.ccmaesencrypt :
- server->secmech.ccmaesdecrypt;
+ tfm = enc ? server->secmech.enc : server->secmech.dec;
if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) ||
(server->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
@@ -4410,11 +4394,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
if (!rc && enc)
memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
- kfree(iv);
+ kfree_sensitive(iv);
free_sg:
- kfree(sg);
+ kfree_sensitive(sg);
free_req:
- kfree(req);
+ kfree_sensitive(req);
return rc;
}
@@ -5102,7 +5086,7 @@ smb2_make_node(unsigned int xid, struct inode *inode,
{
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
int rc = -EPERM;
- FILE_ALL_INFO *buf = NULL;
+ struct cifs_open_info_data buf = {};
struct cifs_io_parms io_parms = {0};
__u32 oplock = 0;
struct cifs_fid fid;
@@ -5118,7 +5102,7 @@ smb2_make_node(unsigned int xid, struct inode *inode,
* and was used by default in earlier versions of Windows
*/
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
- goto out;
+ return rc;
/*
* TODO: Add ability to create instead via reparse point. Windows (e.g.
@@ -5127,16 +5111,10 @@ smb2_make_node(unsigned int xid, struct inode *inode,
*/
if (!S_ISCHR(mode) && !S_ISBLK(mode))
- goto out;
+ return rc;
cifs_dbg(FYI, "sfu compat create special file\n");
- buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
- if (buf == NULL) {
- rc = -ENOMEM;
- goto out;
- }
-
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = GENERIC_WRITE;
@@ -5151,21 +5129,21 @@ smb2_make_node(unsigned int xid, struct inode *inode,
oplock = REQ_OPLOCK;
else
oplock = 0;
- rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, buf);
+ rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf);
if (rc)
- goto out;
+ return rc;
/*
* BB Do not bother to decode buf since no local inode yet to put
* timestamps in, but we can reuse it safely.
*/
- pdev = (struct win_dev *)buf;
+ pdev = (struct win_dev *)&buf.fi;
io_parms.pid = current->tgid;
io_parms.tcon = tcon;
io_parms.offset = 0;
io_parms.length = sizeof(struct win_dev);
- iov[1].iov_base = buf;
+ iov[1].iov_base = &buf.fi;
iov[1].iov_len = sizeof(struct win_dev);
if (S_ISCHR(mode)) {
memcpy(pdev->type, "IntxCHR", 8);
@@ -5184,8 +5162,8 @@ smb2_make_node(unsigned int xid, struct inode *inode,
d_drop(dentry);
/* FIXME: add code here to set EAs */
-out:
- kfree(buf);
+
+ cifs_free_open_info(&buf);
return rc;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 6352ab32c7e7..a5695748a89b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -466,15 +466,14 @@ build_signing_ctxt(struct smb2_signing_capabilities *pneg_ctxt)
/*
* Context Data length must be rounded to multiple of 8 for some servers
*/
- pneg_ctxt->DataLength = cpu_to_le16(DIV_ROUND_UP(
- sizeof(struct smb2_signing_capabilities) -
- sizeof(struct smb2_neg_context) +
- (num_algs * 2 /* sizeof u16 */), 8) * 8);
+ pneg_ctxt->DataLength = cpu_to_le16(ALIGN(sizeof(struct smb2_signing_capabilities) -
+ sizeof(struct smb2_neg_context) +
+ (num_algs * sizeof(u16)), 8));
pneg_ctxt->SigningAlgorithmCount = cpu_to_le16(num_algs);
pneg_ctxt->SigningAlgorithms[0] = cpu_to_le16(SIGNING_ALG_AES_CMAC);
- ctxt_len += 2 /* sizeof le16 */ * num_algs;
- ctxt_len = DIV_ROUND_UP(ctxt_len, 8) * 8;
+ ctxt_len += sizeof(__le16) * num_algs;
+ ctxt_len = ALIGN(ctxt_len, 8);
return ctxt_len;
/* TBD add SIGNING_ALG_AES_GMAC and/or SIGNING_ALG_HMAC_SHA256 */
}
@@ -511,8 +510,7 @@ build_netname_ctxt(struct smb2_netname_neg_context *pneg_ctxt, char *hostname)
/* copy up to max of first 100 bytes of server name to NetName field */
pneg_ctxt->DataLength = cpu_to_le16(2 * cifs_strtoUTF16(pneg_ctxt->NetName, hostname, 100, cp));
/* context size is DataLength + minimal smb2_neg_context */
- return DIV_ROUND_UP(le16_to_cpu(pneg_ctxt->DataLength) +
- sizeof(struct smb2_neg_context), 8) * 8;
+ return ALIGN(le16_to_cpu(pneg_ctxt->DataLength) + sizeof(struct smb2_neg_context), 8);
}
static void
@@ -557,18 +555,18 @@ assemble_neg_contexts(struct smb2_negotiate_req *req,
* round up total_len of fixed part of SMB3 negotiate request to 8
* byte boundary before adding negotiate contexts
*/
- *total_len = roundup(*total_len, 8);
+ *total_len = ALIGN(*total_len, 8);
pneg_ctxt = (*total_len) + (char *)req;
req->NegotiateContextOffset = cpu_to_le32(*total_len);
build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt);
- ctxt_len = DIV_ROUND_UP(sizeof(struct smb2_preauth_neg_context), 8) * 8;
+ ctxt_len = ALIGN(sizeof(struct smb2_preauth_neg_context), 8);
*total_len += ctxt_len;
pneg_ctxt += ctxt_len;
build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
- ctxt_len = DIV_ROUND_UP(sizeof(struct smb2_encryption_neg_context), 8) * 8;
+ ctxt_len = ALIGN(sizeof(struct smb2_encryption_neg_context), 8);
*total_len += ctxt_len;
pneg_ctxt += ctxt_len;
@@ -595,9 +593,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req,
if (server->compress_algorithm) {
build_compression_ctxt((struct smb2_compression_capabilities_context *)
pneg_ctxt);
- ctxt_len = DIV_ROUND_UP(
- sizeof(struct smb2_compression_capabilities_context),
- 8) * 8;
+ ctxt_len = ALIGN(sizeof(struct smb2_compression_capabilities_context), 8);
*total_len += ctxt_len;
pneg_ctxt += ctxt_len;
neg_context_count++;
@@ -780,7 +776,7 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
if (rc)
break;
/* offsets must be 8 byte aligned */
- clen = (clen + 7) & ~0x7;
+ clen = ALIGN(clen, 8);
offset += clen + sizeof(struct smb2_neg_context);
len_of_ctxts -= clen;
}
@@ -873,7 +869,7 @@ SMB2_negotiate(const unsigned int xid,
struct smb2_negotiate_rsp *rsp;
struct kvec iov[1];
struct kvec rsp_iov;
- int rc = 0;
+ int rc;
int resp_buftype;
int blob_offset, blob_length;
char *security_blob;
@@ -1169,9 +1165,9 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
pneg_inbuf->Dialects[0] =
cpu_to_le16(server->vals->protocol_id);
pneg_inbuf->DialectCount = cpu_to_le16(1);
- /* structure is big enough for 3 dialects, sending only 1 */
+ /* structure is big enough for 4 dialects, sending only 1 */
inbuflen = sizeof(*pneg_inbuf) -
- sizeof(pneg_inbuf->Dialects[0]) * 2;
+ sizeof(pneg_inbuf->Dialects[0]) * 3;
}
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
@@ -1345,7 +1341,13 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
static void
SMB2_sess_free_buffer(struct SMB2_sess_data *sess_data)
{
- free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base);
+ struct kvec *iov = sess_data->iov;
+
+ /* iov[1] is already freed by caller */
+ if (sess_data->buf0_type != CIFS_NO_BUFFER && iov[0].iov_base)
+ memzero_explicit(iov[0].iov_base, iov[0].iov_len);
+
+ free_rsp_buf(sess_data->buf0_type, iov[0].iov_base);
sess_data->buf0_type = CIFS_NO_BUFFER;
}
@@ -1477,6 +1479,8 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
out_put_spnego_key:
key_invalidate(spnego_key);
key_put(spnego_key);
+ if (rc)
+ kfree_sensitive(ses->auth_key.response);
out:
sess_data->result = rc;
sess_data->func = NULL;
@@ -1526,7 +1530,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
&blob_length, ses, server,
sess_data->nls_cp);
if (rc)
- goto out_err;
+ goto out;
if (use_spnego) {
/* BB eventually need to add this */
@@ -1573,7 +1577,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
}
out:
- kfree(ntlmssp_blob);
+ kfree_sensitive(ntlmssp_blob);
SMB2_sess_free_buffer(sess_data);
if (!rc) {
sess_data->result = 0;
@@ -1581,7 +1585,7 @@ out:
return;
}
out_err:
- kfree(ses->ntlmssp);
+ kfree_sensitive(ses->ntlmssp);
ses->ntlmssp = NULL;
sess_data->result = rc;
sess_data->func = NULL;
@@ -1657,9 +1661,9 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
}
#endif
out:
- kfree(ntlmssp_blob);
+ kfree_sensitive(ntlmssp_blob);
SMB2_sess_free_buffer(sess_data);
- kfree(ses->ntlmssp);
+ kfree_sensitive(ses->ntlmssp);
ses->ntlmssp = NULL;
sess_data->result = rc;
sess_data->func = NULL;
@@ -1737,7 +1741,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
cifs_server_dbg(VFS, "signing requested but authenticated as guest\n");
rc = sess_data->result;
out:
- kfree(sess_data);
+ kfree_sensitive(sess_data);
return rc;
}
@@ -1930,7 +1934,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
tcon->capabilities = rsp->Capabilities; /* we keep caps little endian */
tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess);
tcon->tid = le32_to_cpu(rsp->hdr.Id.SyncId.TreeId);
- strscpy(tcon->treeName, tree, sizeof(tcon->treeName));
+ strscpy(tcon->tree_name, tree, sizeof(tcon->tree_name));
if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) &&
((tcon->share_flags & SHI1005_FLAGS_DFS) == 0))
@@ -1973,6 +1977,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
if (!ses || !(ses->server))
return -EIO;
+ trace_smb3_tdis_enter(xid, tcon->tid, ses->Suid, tcon->tree_name);
spin_lock(&ses->chan_lock);
if ((tcon->need_reconnect) ||
(CIFS_ALL_CHANS_NEED_RECONNECT(tcon->ses))) {
@@ -2004,8 +2009,11 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
rc = cifs_send_recv(xid, ses, ses->server,
&rqst, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req);
- if (rc)
+ if (rc) {
cifs_stats_fail_inc(tcon, SMB2_TREE_DISCONNECT_HE);
+ trace_smb3_tdis_err(xid, tcon->tid, ses->Suid, rc);
+ }
+ trace_smb3_tdis_done(xid, tcon->tid, ses->Suid);
return rc;
}
@@ -2411,9 +2419,9 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
unsigned int acelen, acl_size, ace_count;
unsigned int owner_offset = 0;
unsigned int group_offset = 0;
- struct smb3_acl acl;
+ struct smb3_acl acl = {};
- *len = roundup(sizeof(struct crt_sd_ctxt) + (sizeof(struct cifs_ace) * 4), 8);
+ *len = round_up(sizeof(struct crt_sd_ctxt) + (sizeof(struct cifs_ace) * 4), 8);
if (set_owner) {
/* sizeof(struct owner_group_sids) is already multiple of 8 so no need to round */
@@ -2484,10 +2492,11 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
acl.AclRevision = ACL_REVISION; /* See 2.4.4.1 of MS-DTYP */
acl.AclSize = cpu_to_le16(acl_size);
acl.AceCount = cpu_to_le16(ace_count);
+ /* acl.Sbz1 and Sbz2 MBZ so are not set here, but initialized above */
memcpy(aclptr, &acl, sizeof(struct smb3_acl));
buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd);
- *len = roundup(ptr - (__u8 *)buf, 8);
+ *len = round_up((unsigned int)(ptr - (__u8 *)buf), 8);
return buf;
}
@@ -2581,7 +2590,7 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
* final path needs to be 8-byte aligned as specified in
* MS-SMB2 2.2.13 SMB2 CREATE Request.
*/
- *out_size = roundup(*out_len * sizeof(__le16), 8);
+ *out_size = round_up(*out_len * sizeof(__le16), 8);
*out_path = kzalloc(*out_size + sizeof(__le16) /* null */, GFP_KERNEL);
if (!*out_path)
return -ENOMEM;
@@ -2674,7 +2683,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
req->hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS;
rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
&name_len,
- tcon->treeName, utf16_path);
+ tcon->tree_name, utf16_path);
if (rc)
goto err_free_req;
@@ -2816,7 +2825,7 @@ SMB2_open_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server,
req->hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS;
rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
&name_len,
- tcon->treeName, path);
+ tcon->tree_name, path);
if (rc)
return rc;
req->NameLength = cpu_to_le16(name_len * 2);
@@ -2826,9 +2835,7 @@ SMB2_open_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server,
uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
/* MUST set path len (NameLength) to 0 opening root of share */
req->NameLength = cpu_to_le16(uni_path_len - 2);
- copy_size = uni_path_len;
- if (copy_size % 8 != 0)
- copy_size = roundup(copy_size, 8);
+ copy_size = round_up(uni_path_len, 8);
copy_path = kzalloc(copy_size, GFP_KERNEL);
if (!copy_path)
return -ENOMEM;
@@ -3011,7 +3018,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
oparms->create_options, oparms->desired_access, rc);
if (rc == -EREMCHG) {
pr_warn_once("server share %s deleted\n",
- tcon->treeName);
+ tcon->tree_name);
tcon->need_reconnect = true;
}
goto creat_exit;
@@ -3472,7 +3479,7 @@ smb2_validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
if (rc)
return rc;
- memcpy(data, begin_of_buf, buffer_length);
+ memcpy(data, begin_of_buf, minbufsize);
return 0;
}
@@ -3596,7 +3603,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
rc = smb2_validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
le32_to_cpu(rsp->OutputBufferLength),
- &rsp_iov, min_len, *data);
+ &rsp_iov, dlen ? *dlen : min_len, *data);
if (rc && allocated) {
kfree(*data);
*data = NULL;
@@ -3702,11 +3709,13 @@ SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst,
int
SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, bool watch_tree,
- u32 completion_filter)
+ u32 completion_filter, u32 max_out_data_len, char **out_data,
+ u32 *plen /* returned data len */)
{
struct cifs_ses *ses = tcon->ses;
struct TCP_Server_Info *server = cifs_pick_channel(ses);
struct smb_rqst rqst;
+ struct smb2_change_notify_rsp *smb_rsp;
struct kvec iov[1];
struct kvec rsp_iov = {NULL, 0};
int resp_buftype = CIFS_NO_BUFFER;
@@ -3722,6 +3731,9 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
memset(&rqst, 0, sizeof(struct smb_rqst));
memset(&iov, 0, sizeof(iov));
+ if (plen)
+ *plen = 0;
+
rqst.rq_iov = iov;
rqst.rq_nvec = 1;
@@ -3740,9 +3752,28 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
cifs_stats_fail_inc(tcon, SMB2_CHANGE_NOTIFY_HE);
trace_smb3_notify_err(xid, persistent_fid, tcon->tid, ses->Suid,
(u8)watch_tree, completion_filter, rc);
- } else
+ } else {
trace_smb3_notify_done(xid, persistent_fid, tcon->tid,
- ses->Suid, (u8)watch_tree, completion_filter);
+ ses->Suid, (u8)watch_tree, completion_filter);
+ /* validate that notify information is plausible */
+ if ((rsp_iov.iov_base == NULL) ||
+ (rsp_iov.iov_len < sizeof(struct smb2_change_notify_rsp)))
+ goto cnotify_exit;
+
+ smb_rsp = (struct smb2_change_notify_rsp *)rsp_iov.iov_base;
+
+ smb2_validate_iov(le16_to_cpu(smb_rsp->OutputBufferOffset),
+ le32_to_cpu(smb_rsp->OutputBufferLength), &rsp_iov,
+ sizeof(struct file_notify_information));
+
+ *out_data = kmemdup((char *)smb_rsp + le16_to_cpu(smb_rsp->OutputBufferOffset),
+ le32_to_cpu(smb_rsp->OutputBufferLength), GFP_KERNEL);
+ if (*out_data == NULL) {
+ rc = -ENOMEM;
+ goto cnotify_exit;
+ } else
+ *plen = le32_to_cpu(smb_rsp->OutputBufferLength);
+ }
cnotify_exit:
if (rqst.rq_iov)
@@ -4090,7 +4121,7 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
if (request_type & CHAINED_REQUEST) {
if (!(request_type & END_OF_CHAIN)) {
/* next 8-byte aligned request */
- *total_len = DIV_ROUND_UP(*total_len, 8) * 8;
+ *total_len = ALIGN(*total_len, 8);
shdr->NextCommand = cpu_to_le32(*total_len);
} else /* END_OF_CHAIN */
shdr->NextCommand = 0;
@@ -4429,7 +4460,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
wdata->bytes, wdata->result);
if (wdata->result == -ENOSPC)
pr_warn_once("Out of space writing to %s\n",
- tcon->treeName);
+ tcon->tree_name);
} else
trace_smb3_write_done(0 /* no xid */,
wdata->cfile->fid.persistent_fid,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index f57881b8464f..1237bb86e93a 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -56,6 +56,9 @@ struct smb2_rdma_crypto_transform {
#define COMPOUND_FID 0xFFFFFFFFFFFFFFFFULL
+#define SMB2_SYMLINK_STRUCT_SIZE \
+ (sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp))
+
#define SYMLINK_ERROR_TAG 0x4c4d5953
struct smb2_symlink_err_rsp {
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 3f740f24b96a..be21b5d26f67 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -53,16 +53,12 @@ extern bool smb2_is_valid_oplock_break(char *buffer,
struct TCP_Server_Info *srv);
extern int smb3_handle_read_data(struct TCP_Server_Info *server,
struct mid_q_entry *mid);
-
-extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
- struct smb2_file_all_info *src);
extern int smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, const char *path,
__u32 *reparse_tag);
-extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb,
- const char *full_path, FILE_ALL_INFO *data,
- bool *adjust_tz, bool *symlink);
+int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, const char *full_path,
+ struct cifs_open_info_data *data, bool *adjust_tz, bool *reparse);
extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
const char *full_path, __u64 size,
struct cifs_sb_info *cifs_sb, bool set_alloc);
@@ -95,9 +91,9 @@ extern int smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
const unsigned char *path, char *pbuf,
unsigned int *pbytes_read);
-extern int smb2_open_file(const unsigned int xid,
- struct cifs_open_parms *oparms,
- __u32 *oplock, FILE_ALL_INFO *buf);
+int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, const struct kvec *iov, char **path);
+int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock,
+ void *buf);
extern int smb2_unlock_range(struct cifsFileInfo *cfile,
struct file_lock *flock, const unsigned int xid);
extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
@@ -148,7 +144,8 @@ extern int SMB2_ioctl_init(struct cifs_tcon *tcon,
extern void SMB2_ioctl_free(struct smb_rqst *rqst);
extern int SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, bool watch_tree,
- u32 completion_filter);
+ u32 completion_filter, u32 max_out_data_len,
+ char **out_data, u32 *plen /* returned data len */);
extern int __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
@@ -278,9 +275,9 @@ extern int smb2_query_info_compound(const unsigned int xid,
struct kvec *rsp, int *buftype,
struct cifs_sb_info *cifs_sb);
/* query path info from the server using SMB311 POSIX extensions*/
-extern int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *sb, const char *path, struct smb311_posix_qinfo *qinf,
- bool *adjust_tx, bool *symlink);
+int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, const char *full_path,
+ struct cifs_open_info_data *data, bool *adjust_tz, bool *reparse);
int posix_info_parse(const void *beg, const void *end,
struct smb2_posix_info_parsed *out);
int posix_info_sid_size(const void *beg, const void *end);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 1a5fc3314dbf..381babc1212c 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -32,19 +32,17 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
struct cifs_secmech *p = &server->secmech;
int rc;
- rc = cifs_alloc_hash("hmac(sha256)",
- &p->hmacsha256,
- &p->sdeschmacsha256);
+ rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256);
if (rc)
goto err;
- rc = cifs_alloc_hash("cmac(aes)", &p->cmacaes, &p->sdesccmacaes);
+ rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac);
if (rc)
goto err;
return 0;
err:
- cifs_free_hash(&p->hmacsha256, &p->sdeschmacsha256);
+ cifs_free_hash(&p->hmacsha256);
return rc;
}
@@ -54,25 +52,23 @@ smb311_crypto_shash_allocate(struct TCP_Server_Info *server)
struct cifs_secmech *p = &server->secmech;
int rc = 0;
- rc = cifs_alloc_hash("hmac(sha256)",
- &p->hmacsha256,
- &p->sdeschmacsha256);
+ rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256);
if (rc)
return rc;
- rc = cifs_alloc_hash("cmac(aes)", &p->cmacaes, &p->sdesccmacaes);
+ rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac);
if (rc)
goto err;
- rc = cifs_alloc_hash("sha512", &p->sha512, &p->sdescsha512);
+ rc = cifs_alloc_hash("sha512", &p->sha512);
if (rc)
goto err;
return 0;
err:
- cifs_free_hash(&p->cmacaes, &p->sdesccmacaes);
- cifs_free_hash(&p->hmacsha256, &p->sdeschmacsha256);
+ cifs_free_hash(&p->aes_cmac);
+ cifs_free_hash(&p->hmacsha256);
return rc;
}
@@ -81,18 +77,19 @@ static
int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
{
struct cifs_chan *chan;
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses = NULL;
- struct TCP_Server_Info *it = NULL;
int i;
int rc = 0;
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(it, &cifs_tcp_ses_list, tcp_ses_list) {
- list_for_each_entry(ses, &it->smb_ses_list, smb_ses_list) {
- if (ses->Suid == ses_id)
- goto found;
- }
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (ses->Suid == ses_id)
+ goto found;
}
cifs_server_dbg(VFS, "%s: Could not find session 0x%llx\n",
__func__, ses_id);
@@ -140,9 +137,13 @@ out:
static struct cifs_ses *
smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id)
{
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
if (ses->Suid != ses_id)
continue;
++ses->ses_count;
@@ -219,34 +220,30 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
struct kvec *iov = rqst->rq_iov;
struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base;
struct cifs_ses *ses;
- struct shash_desc *shash;
- struct crypto_shash *hash;
- struct sdesc *sdesc = NULL;
+ struct shash_desc *shash = NULL;
struct smb_rqst drqst;
ses = smb2_find_smb_ses(server, le64_to_cpu(shdr->SessionId));
- if (!ses) {
+ if (unlikely(!ses)) {
cifs_server_dbg(VFS, "%s: Could not find session\n", __func__);
- return 0;
+ return -ENOENT;
}
memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE);
memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE);
if (allocate_crypto) {
- rc = cifs_alloc_hash("hmac(sha256)", &hash, &sdesc);
+ rc = cifs_alloc_hash("hmac(sha256)", &shash);
if (rc) {
cifs_server_dbg(VFS,
"%s: sha256 alloc failed\n", __func__);
goto out;
}
- shash = &sdesc->shash;
} else {
- hash = server->secmech.hmacsha256;
- shash = &server->secmech.sdeschmacsha256->shash;
+ shash = server->secmech.hmacsha256;
}
- rc = crypto_shash_setkey(hash, ses->auth_key.response,
+ rc = crypto_shash_setkey(shash->tfm, ses->auth_key.response,
SMB2_NTLMV2_SESSKEY_SIZE);
if (rc) {
cifs_server_dbg(VFS,
@@ -288,7 +285,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
out:
if (allocate_crypto)
- cifs_free_hash(&hash, &sdesc);
+ cifs_free_hash(&shash);
if (ses)
cifs_put_smb_ses(ses);
return rc;
@@ -315,42 +312,38 @@ static int generate_key(struct cifs_ses *ses, struct kvec label,
goto smb3signkey_ret;
}
- rc = crypto_shash_setkey(server->secmech.hmacsha256,
+ rc = crypto_shash_setkey(server->secmech.hmacsha256->tfm,
ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
if (rc) {
cifs_server_dbg(VFS, "%s: Could not set with session key\n", __func__);
goto smb3signkey_ret;
}
- rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash);
+ rc = crypto_shash_init(server->secmech.hmacsha256);
if (rc) {
cifs_server_dbg(VFS, "%s: Could not init sign hmac\n", __func__);
goto smb3signkey_ret;
}
- rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
- i, 4);
+ rc = crypto_shash_update(server->secmech.hmacsha256, i, 4);
if (rc) {
cifs_server_dbg(VFS, "%s: Could not update with n\n", __func__);
goto smb3signkey_ret;
}
- rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
- label.iov_base, label.iov_len);
+ rc = crypto_shash_update(server->secmech.hmacsha256, label.iov_base, label.iov_len);
if (rc) {
cifs_server_dbg(VFS, "%s: Could not update with label\n", __func__);
goto smb3signkey_ret;
}
- rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
- &zero, 1);
+ rc = crypto_shash_update(server->secmech.hmacsha256, &zero, 1);
if (rc) {
cifs_server_dbg(VFS, "%s: Could not update with zero\n", __func__);
goto smb3signkey_ret;
}
- rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
- context.iov_base, context.iov_len);
+ rc = crypto_shash_update(server->secmech.hmacsha256, context.iov_base, context.iov_len);
if (rc) {
cifs_server_dbg(VFS, "%s: Could not update with context\n", __func__);
goto smb3signkey_ret;
@@ -358,19 +351,16 @@ static int generate_key(struct cifs_ses *ses, struct kvec label,
if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) ||
(server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) {
- rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
- L256, 4);
+ rc = crypto_shash_update(server->secmech.hmacsha256, L256, 4);
} else {
- rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
- L128, 4);
+ rc = crypto_shash_update(server->secmech.hmacsha256, L128, 4);
}
if (rc) {
cifs_server_dbg(VFS, "%s: Could not update with L\n", __func__);
goto smb3signkey_ret;
}
- rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash,
- hashptr);
+ rc = crypto_shash_final(server->secmech.hmacsha256, hashptr);
if (rc) {
cifs_server_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__);
goto smb3signkey_ret;
@@ -550,38 +540,35 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
unsigned char *sigptr = smb3_signature;
struct kvec *iov = rqst->rq_iov;
struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base;
- struct shash_desc *shash;
- struct crypto_shash *hash;
- struct sdesc *sdesc = NULL;
+ struct shash_desc *shash = NULL;
struct smb_rqst drqst;
u8 key[SMB3_SIGN_KEY_SIZE];
rc = smb2_get_sign_key(le64_to_cpu(shdr->SessionId), server, key);
- if (rc)
- return 0;
+ if (unlikely(rc)) {
+ cifs_server_dbg(VFS, "%s: Could not get signing key\n", __func__);
+ return rc;
+ }
if (allocate_crypto) {
- rc = cifs_alloc_hash("cmac(aes)", &hash, &sdesc);
+ rc = cifs_alloc_hash("cmac(aes)", &shash);
if (rc)
return rc;
-
- shash = &sdesc->shash;
} else {
- hash = server->secmech.cmacaes;
- shash = &server->secmech.sdesccmacaes->shash;
+ shash = server->secmech.aes_cmac;
}
memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE);
memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE);
- rc = crypto_shash_setkey(hash, key, SMB2_CMACAES_SIZE);
+ rc = crypto_shash_setkey(shash->tfm, key, SMB2_CMACAES_SIZE);
if (rc) {
cifs_server_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__);
goto out;
}
/*
- * we already allocate sdesccmacaes when we init smb3 signing key,
+ * we already allocate aes_cmac when we init smb3 signing key,
* so unlike smb2 case we do not have to check here if secmech are
* initialized
*/
@@ -617,7 +604,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
out:
if (allocate_crypto)
- cifs_free_hash(&hash, &sdesc);
+ cifs_free_hash(&shash);
return rc;
}
@@ -902,7 +889,7 @@ smb3_crypto_aead_allocate(struct TCP_Server_Info *server)
{
struct crypto_aead *tfm;
- if (!server->secmech.ccmaesencrypt) {
+ if (!server->secmech.enc) {
if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) ||
(server->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
@@ -913,23 +900,23 @@ smb3_crypto_aead_allocate(struct TCP_Server_Info *server)
__func__);
return PTR_ERR(tfm);
}
- server->secmech.ccmaesencrypt = tfm;
+ server->secmech.enc = tfm;
}
- if (!server->secmech.ccmaesdecrypt) {
+ if (!server->secmech.dec) {
if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) ||
(server->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
else
tfm = crypto_alloc_aead("ccm(aes)", 0, 0);
if (IS_ERR(tfm)) {
- crypto_free_aead(server->secmech.ccmaesencrypt);
- server->secmech.ccmaesencrypt = NULL;
+ crypto_free_aead(server->secmech.enc);
+ server->secmech.enc = NULL;
cifs_server_dbg(VFS, "%s: Failed to alloc decrypt aead\n",
__func__);
return PTR_ERR(tfm);
}
- server->secmech.ccmaesdecrypt = tfm;
+ server->secmech.dec = tfm;
}
return 0;
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 5fbbec22bcc8..90789aaa6567 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -90,7 +90,7 @@ int smbd_max_send_size = 1364;
int smbd_max_fragmented_recv_size = 1024 * 1024;
/* The maximum single-message size which can be received */
-int smbd_max_receive_size = 8192;
+int smbd_max_receive_size = 1364;
/* The timeout to initiate send of a keepalive message on idle */
int smbd_keep_alive_interval = 120;
@@ -99,7 +99,7 @@ int smbd_keep_alive_interval = 120;
* User configurable initial values for RDMA transport
* The actual values used may be lower and are limited to hardware capabilities
*/
-/* Default maximum number of SGEs in a RDMA write/read */
+/* Default maximum number of pages in a single RDMA write/read */
int smbd_max_frmr_depth = 2048;
/* If payload is less than this byte, use RDMA send/recv not read/write */
@@ -270,7 +270,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc)
struct smbd_request *request =
container_of(wc->wr_cqe, struct smbd_request, cqe);
- log_rdma_send(INFO, "smbd_request %p completed wc->status=%d\n",
+ log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n",
request, wc->status);
if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
@@ -448,7 +448,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
struct smbd_connection *info = response->info;
int data_length = 0;
- log_rdma_recv(INFO, "response=%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%x\n",
+ log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n",
response, response->type, wc->status, wc->opcode,
wc->byte_len, wc->pkey_index);
@@ -723,7 +723,7 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info)
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
- log_rdma_send(INFO, "sge addr=%llx length=%x lkey=%x\n",
+ log_rdma_send(INFO, "sge addr=0x%llx length=%u lkey=0x%x\n",
request->sge[0].addr,
request->sge[0].length, request->sge[0].lkey);
@@ -792,7 +792,7 @@ static int smbd_post_send(struct smbd_connection *info,
for (i = 0; i < request->num_sge; i++) {
log_rdma_send(INFO,
- "rdma_request sge[%d] addr=%llu length=%u\n",
+ "rdma_request sge[%d] addr=0x%llx length=%u\n",
i, request->sge[i].addr, request->sge[i].length);
ib_dma_sync_single_for_device(
info->id->device,
@@ -1017,9 +1017,9 @@ static int smbd_post_send_data(
{
int i;
u32 data_length = 0;
- struct scatterlist sgl[SMBDIRECT_MAX_SGE];
+ struct scatterlist sgl[SMBDIRECT_MAX_SEND_SGE - 1];
- if (n_vec > SMBDIRECT_MAX_SGE) {
+ if (n_vec > SMBDIRECT_MAX_SEND_SGE - 1) {
cifs_dbg(VFS, "Can't fit data to SGL, n_vec=%d\n", n_vec);
return -EINVAL;
}
@@ -1079,7 +1079,7 @@ static int smbd_negotiate(struct smbd_connection *info)
response->type = SMBD_NEGOTIATE_RESP;
rc = smbd_post_recv(info, response);
- log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=%llx iov.length=%x iov.lkey=%x\n",
+ log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n",
rc, response->sge.addr,
response->sge.length, response->sge.lkey);
if (rc)
@@ -1539,7 +1539,7 @@ static struct smbd_connection *_smbd_get_connection(
if (smbd_send_credit_target > info->id->device->attrs.max_cqe ||
smbd_send_credit_target > info->id->device->attrs.max_qp_wr) {
- log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
+ log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
smbd_send_credit_target,
info->id->device->attrs.max_cqe,
info->id->device->attrs.max_qp_wr);
@@ -1548,7 +1548,7 @@ static struct smbd_connection *_smbd_get_connection(
if (smbd_receive_credit_max > info->id->device->attrs.max_cqe ||
smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) {
- log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
+ log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
smbd_receive_credit_max,
info->id->device->attrs.max_cqe,
info->id->device->attrs.max_qp_wr);
@@ -1562,17 +1562,15 @@ static struct smbd_connection *_smbd_get_connection(
info->max_receive_size = smbd_max_receive_size;
info->keep_alive_interval = smbd_keep_alive_interval;
- if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) {
+ if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE ||
+ info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) {
log_rdma_event(ERR,
- "warning: device max_send_sge = %d too small\n",
- info->id->device->attrs.max_send_sge);
- log_rdma_event(ERR, "Queue Pair creation may fail\n");
- }
- if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) {
- log_rdma_event(ERR,
- "warning: device max_recv_sge = %d too small\n",
+ "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n",
+ IB_DEVICE_NAME_MAX,
+ info->id->device->name,
+ info->id->device->attrs.max_send_sge,
info->id->device->attrs.max_recv_sge);
- log_rdma_event(ERR, "Queue Pair creation may fail\n");
+ goto config_failed;
}
info->send_cq = NULL;
@@ -1598,8 +1596,8 @@ static struct smbd_connection *_smbd_get_connection(
qp_attr.qp_context = info;
qp_attr.cap.max_send_wr = info->send_credit_target;
qp_attr.cap.max_recv_wr = info->receive_credit_max;
- qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SGE;
- qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_SGE;
+ qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE;
+ qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE;
qp_attr.cap.max_inline_data = 0;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
qp_attr.qp_type = IB_QPT_RC;
@@ -1986,10 +1984,11 @@ int smbd_send(struct TCP_Server_Info *server,
int num_rqst, struct smb_rqst *rqst_array)
{
struct smbd_connection *info = server->smbd_conn;
- struct kvec vec;
+ struct kvec vecs[SMBDIRECT_MAX_SEND_SGE - 1];
int nvecs;
int size;
unsigned int buflen, remaining_data_length;
+ unsigned int offset, remaining_vec_data_length;
int start, i, j;
int max_iov_size =
info->max_send_size - sizeof(struct smbd_data_transfer);
@@ -1998,10 +1997,8 @@ int smbd_send(struct TCP_Server_Info *server,
struct smb_rqst *rqst;
int rqst_idx;
- if (info->transport_status != SMBD_CONNECTED) {
- rc = -EAGAIN;
- goto done;
- }
+ if (info->transport_status != SMBD_CONNECTED)
+ return -EAGAIN;
/*
* Add in the page array if there is one. The caller needs to set
@@ -2012,125 +2009,95 @@ int smbd_send(struct TCP_Server_Info *server,
for (i = 0; i < num_rqst; i++)
remaining_data_length += smb_rqst_len(server, &rqst_array[i]);
- if (remaining_data_length > info->max_fragmented_send_size) {
+ if (unlikely(remaining_data_length > info->max_fragmented_send_size)) {
+ /* assertion: payload never exceeds negotiated maximum */
log_write(ERR, "payload size %d > max size %d\n",
remaining_data_length, info->max_fragmented_send_size);
- rc = -EINVAL;
- goto done;
+ return -EINVAL;
}
log_write(INFO, "num_rqst=%d total length=%u\n",
num_rqst, remaining_data_length);
rqst_idx = 0;
-next_rqst:
- rqst = &rqst_array[rqst_idx];
- iov = rqst->rq_iov;
-
- cifs_dbg(FYI, "Sending smb (RDMA): idx=%d smb_len=%lu\n",
- rqst_idx, smb_rqst_len(server, rqst));
- for (i = 0; i < rqst->rq_nvec; i++)
- dump_smb(iov[i].iov_base, iov[i].iov_len);
-
-
- log_write(INFO, "rqst_idx=%d nvec=%d rqst->rq_npages=%d rq_pagesz=%d rq_tailsz=%d buflen=%lu\n",
- rqst_idx, rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz,
- rqst->rq_tailsz, smb_rqst_len(server, rqst));
-
- start = i = 0;
- buflen = 0;
- while (true) {
- buflen += iov[i].iov_len;
- if (buflen > max_iov_size) {
- if (i > start) {
- remaining_data_length -=
- (buflen-iov[i].iov_len);
- log_write(INFO, "sending iov[] from start=%d i=%d nvecs=%d remaining_data_length=%d\n",
- start, i, i - start,
- remaining_data_length);
- rc = smbd_post_send_data(
- info, &iov[start], i-start,
- remaining_data_length);
- if (rc)
- goto done;
- } else {
- /* iov[start] is too big, break it */
- nvecs = (buflen+max_iov_size-1)/max_iov_size;
- log_write(INFO, "iov[%d] iov_base=%p buflen=%d break to %d vectors\n",
- start, iov[start].iov_base,
- buflen, nvecs);
- for (j = 0; j < nvecs; j++) {
- vec.iov_base =
- (char *)iov[start].iov_base +
- j*max_iov_size;
- vec.iov_len = max_iov_size;
- if (j == nvecs-1)
- vec.iov_len =
- buflen -
- max_iov_size*(nvecs-1);
- remaining_data_length -= vec.iov_len;
- log_write(INFO,
- "sending vec j=%d iov_base=%p iov_len=%zu remaining_data_length=%d\n",
- j, vec.iov_base, vec.iov_len,
- remaining_data_length);
- rc = smbd_post_send_data(
- info, &vec, 1,
- remaining_data_length);
- if (rc)
- goto done;
+ do {
+ rqst = &rqst_array[rqst_idx];
+ iov = rqst->rq_iov;
+
+ cifs_dbg(FYI, "Sending smb (RDMA): idx=%d smb_len=%lu\n",
+ rqst_idx, smb_rqst_len(server, rqst));
+ remaining_vec_data_length = 0;
+ for (i = 0; i < rqst->rq_nvec; i++) {
+ remaining_vec_data_length += iov[i].iov_len;
+ dump_smb(iov[i].iov_base, iov[i].iov_len);
+ }
+
+ log_write(INFO, "rqst_idx=%d nvec=%d rqst->rq_npages=%d rq_pagesz=%d rq_tailsz=%d buflen=%lu\n",
+ rqst_idx, rqst->rq_nvec,
+ rqst->rq_npages, rqst->rq_pagesz,
+ rqst->rq_tailsz, smb_rqst_len(server, rqst));
+
+ start = 0;
+ offset = 0;
+ do {
+ buflen = 0;
+ i = start;
+ j = 0;
+ while (i < rqst->rq_nvec &&
+ j < SMBDIRECT_MAX_SEND_SGE - 1 &&
+ buflen < max_iov_size) {
+
+ vecs[j].iov_base = iov[i].iov_base + offset;
+ if (buflen + iov[i].iov_len > max_iov_size) {
+ vecs[j].iov_len =
+ max_iov_size - iov[i].iov_len;
+ buflen = max_iov_size;
+ offset = vecs[j].iov_len;
+ } else {
+ vecs[j].iov_len =
+ iov[i].iov_len - offset;
+ buflen += vecs[j].iov_len;
+ offset = 0;
+ ++i;
}
- i++;
- if (i == rqst->rq_nvec)
- break;
+ ++j;
}
+
+ remaining_vec_data_length -= buflen;
+ remaining_data_length -= buflen;
+ log_write(INFO, "sending %s iov[%d] from start=%d nvecs=%d remaining_data_length=%d\n",
+ remaining_vec_data_length > 0 ?
+ "partial" : "complete",
+ rqst->rq_nvec, start, j,
+ remaining_data_length);
+
start = i;
- buflen = 0;
- } else {
- i++;
- if (i == rqst->rq_nvec) {
- /* send out all remaining vecs */
- remaining_data_length -= buflen;
- log_write(INFO, "sending iov[] from start=%d i=%d nvecs=%d remaining_data_length=%d\n",
- start, i, i - start,
+ rc = smbd_post_send_data(info, vecs, j, remaining_data_length);
+ if (rc)
+ goto done;
+ } while (remaining_vec_data_length > 0);
+
+ /* now sending pages if there are any */
+ for (i = 0; i < rqst->rq_npages; i++) {
+ rqst_page_get_length(rqst, i, &buflen, &offset);
+ nvecs = (buflen + max_iov_size - 1) / max_iov_size;
+ log_write(INFO, "sending pages buflen=%d nvecs=%d\n",
+ buflen, nvecs);
+ for (j = 0; j < nvecs; j++) {
+ size = min_t(unsigned int, max_iov_size, remaining_data_length);
+ remaining_data_length -= size;
+ log_write(INFO, "sending pages i=%d offset=%d size=%d remaining_data_length=%d\n",
+ i, j * max_iov_size + offset, size,
remaining_data_length);
- rc = smbd_post_send_data(info, &iov[start],
- i-start, remaining_data_length);
+ rc = smbd_post_send_page(
+ info, rqst->rq_pages[i],
+ j*max_iov_size + offset,
+ size, remaining_data_length);
if (rc)
goto done;
- break;
}
}
- log_write(INFO, "looping i=%d buflen=%d\n", i, buflen);
- }
-
- /* now sending pages if there are any */
- for (i = 0; i < rqst->rq_npages; i++) {
- unsigned int offset;
-
- rqst_page_get_length(rqst, i, &buflen, &offset);
- nvecs = (buflen + max_iov_size - 1) / max_iov_size;
- log_write(INFO, "sending pages buflen=%d nvecs=%d\n",
- buflen, nvecs);
- for (j = 0; j < nvecs; j++) {
- size = max_iov_size;
- if (j == nvecs-1)
- size = buflen - j*max_iov_size;
- remaining_data_length -= size;
- log_write(INFO, "sending pages i=%d offset=%d size=%d remaining_data_length=%d\n",
- i, j * max_iov_size + offset, size,
- remaining_data_length);
- rc = smbd_post_send_page(
- info, rqst->rq_pages[i],
- j*max_iov_size + offset,
- size, remaining_data_length);
- if (rc)
- goto done;
- }
- }
-
- rqst_idx++;
- if (rqst_idx < num_rqst)
- goto next_rqst;
+ } while (++rqst_idx < num_rqst);
done:
/*
diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h
index a87fca82a796..207ef979cd51 100644
--- a/fs/cifs/smbdirect.h
+++ b/fs/cifs/smbdirect.h
@@ -91,7 +91,7 @@ struct smbd_connection {
/* Memory registrations */
/* Maximum number of RDMA read/write outstanding on this connection */
int responder_resources;
- /* Maximum number of SGEs in a RDMA write/read */
+ /* Maximum number of pages in a single RDMA write/read on this connection */
int max_frmr_depth;
/*
* If payload is less than or equal to the threshold,
@@ -225,21 +225,25 @@ struct smbd_buffer_descriptor_v1 {
__le32 length;
} __packed;
-/* Default maximum number of SGEs in a RDMA send/recv */
-#define SMBDIRECT_MAX_SGE 16
+/* Maximum number of SGEs used by smbdirect.c in any send work request */
+#define SMBDIRECT_MAX_SEND_SGE 6
+
/* The context for a SMBD request */
struct smbd_request {
struct smbd_connection *info;
struct ib_cqe cqe;
- /* the SGE entries for this packet */
- struct ib_sge sge[SMBDIRECT_MAX_SGE];
+ /* the SGE entries for this work request */
+ struct ib_sge sge[SMBDIRECT_MAX_SEND_SGE];
int num_sge;
/* SMBD packet header follows this structure */
u8 packet[];
};
+/* Maximum number of SGEs used by smbdirect.c in any receive work request */
+#define SMBDIRECT_MAX_RECV_SGE 1
+
/* The context for a SMBD response */
struct smbd_response {
struct smbd_connection *info;
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index 6b88dc2e364f..110070ba8b04 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -372,6 +372,7 @@ DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_eof_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_info_compound_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(delete_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mkdir_enter);
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(tdis_enter);
DECLARE_EVENT_CLASS(smb3_inf_compound_done_class,
@@ -409,6 +410,7 @@ DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_eof_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_info_compound_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(delete_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mkdir_done);
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(tdis_done);
DECLARE_EVENT_CLASS(smb3_inf_compound_err_class,
@@ -451,6 +453,7 @@ DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_eof_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_info_compound_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mkdir_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(delete_err);
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(tdis_err);
/*
* For logging SMB3 Status code and Command for responses which return errors
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 9a2753e21170..575fa8f58342 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -753,8 +753,9 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
{
int error;
- error = wait_event_freezekillable_unsafe(server->response_q,
- midQ->mid_state != MID_REQUEST_SUBMITTED);
+ error = wait_event_state(server->response_q,
+ midQ->mid_state != MID_REQUEST_SUBMITTED,
+ (TASK_KILLABLE|TASK_FREEZABLE_UNSAFE));
if (error < 0)
return -ERESTARTSYS;
diff --git a/fs/coredump.c b/fs/coredump.c
index 9f4aae202109..7bad7785e8e6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -354,7 +354,7 @@ static int zap_process(struct task_struct *start, int exit_code)
struct task_struct *t;
int nr = 0;
- /* ignore all signals except SIGKILL, see prepare_signal() */
+ /* Allow SIGKILL, see prepare_signal() */
start->signal->flags = SIGNAL_GROUP_EXIT;
start->signal->group_exit_code = exit_code;
start->signal->group_stop_count = 0;
@@ -402,9 +402,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
if (core_waiters > 0) {
struct core_thread *ptr;
- freezer_do_not_count();
- wait_for_completion(&core_state->startup);
- freezer_count();
+ wait_for_completion_state(&core_state->startup,
+ TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
/*
* Wait for all the threads to become inactive, so that
* all the thread context (extended register state, like
@@ -412,7 +411,7 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
*/
ptr = core_state->dumper.next;
while (ptr != NULL) {
- wait_task_inactive(ptr->task, 0);
+ wait_task_inactive(ptr->task, TASK_ANY);
ptr = ptr->next;
}
}
@@ -832,6 +831,39 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr)
}
}
+static int dump_emit_page(struct coredump_params *cprm, struct page *page)
+{
+ struct bio_vec bvec = {
+ .bv_page = page,
+ .bv_offset = 0,
+ .bv_len = PAGE_SIZE,
+ };
+ struct iov_iter iter;
+ struct file *file = cprm->file;
+ loff_t pos;
+ ssize_t n;
+
+ if (cprm->to_skip) {
+ if (!__dump_skip(cprm, cprm->to_skip))
+ return 0;
+ cprm->to_skip = 0;
+ }
+ if (cprm->written + PAGE_SIZE > cprm->limit)
+ return 0;
+ if (dump_interrupted())
+ return 0;
+ pos = file->f_pos;
+ iov_iter_bvec(&iter, WRITE, &bvec, 1, PAGE_SIZE);
+ n = __kernel_write_iter(cprm->file, &iter, &pos);
+ if (n != PAGE_SIZE)
+ return 0;
+ file->f_pos = pos;
+ cprm->written += PAGE_SIZE;
+ cprm->pos += PAGE_SIZE;
+
+ return 1;
+}
+
int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
{
if (cprm->to_skip) {
@@ -863,7 +895,6 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
struct page *page;
- int stop;
/*
* To avoid having to allocate page tables for virtual address
@@ -874,10 +905,7 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
*/
page = get_dump_page(addr);
if (page) {
- void *kaddr = kmap_local_page(page);
-
- stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
- kunmap_local(kaddr);
+ int stop = !dump_emit_page(cprm, page);
put_page(page);
if (stop)
return 0;
@@ -1072,30 +1100,20 @@ whole:
return vma->vm_end - vma->vm_start;
}
-static struct vm_area_struct *first_vma(struct task_struct *tsk,
- struct vm_area_struct *gate_vma)
-{
- struct vm_area_struct *ret = tsk->mm->mmap;
-
- if (ret)
- return ret;
- return gate_vma;
-}
-
/*
* Helper function for iterating across a vma list. It ensures that the caller
* will visit `gate_vma' prior to terminating the search.
*/
-static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
+static struct vm_area_struct *coredump_next_vma(struct ma_state *mas,
+ struct vm_area_struct *vma,
struct vm_area_struct *gate_vma)
{
- struct vm_area_struct *ret;
-
- ret = this_vma->vm_next;
- if (ret)
- return ret;
- if (this_vma == gate_vma)
+ if (gate_vma && (vma == gate_vma))
return NULL;
+
+ vma = mas_next(mas, ULONG_MAX);
+ if (vma)
+ return vma;
return gate_vma;
}
@@ -1119,9 +1137,10 @@ static void free_vma_snapshot(struct coredump_params *cprm)
*/
static bool dump_vma_snapshot(struct coredump_params *cprm)
{
- struct vm_area_struct *vma, *gate_vma;
+ struct vm_area_struct *gate_vma, *vma = NULL;
struct mm_struct *mm = current->mm;
- int i;
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
+ int i = 0;
/*
* Once the stack expansion code is fixed to not change VMA bounds
@@ -1141,8 +1160,7 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
return false;
}
- for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
- vma = next_vma(vma, gate_vma), i++) {
+ while ((vma = coredump_next_vma(&mas, vma, gate_vma)) != NULL) {
struct core_vma_metadata *m = cprm->vma_meta + i;
m->start = vma->vm_start;
@@ -1150,10 +1168,10 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
m->flags = vma->vm_flags;
m->dump_size = vma_dump_size(vma, cprm->mm_flags);
m->pgoff = vma->vm_pgoff;
-
m->file = vma->vm_file;
if (m->file)
get_file(m->file);
+ i++;
}
mmap_write_unlock(mm);
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 2217fe5ece6f..1b4403136d05 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -25,21 +25,25 @@
* then this function isn't applicable. This function may sleep, so it must be
* called from a workqueue rather than from the bio's bi_end_io callback.
*
- * This function sets PG_error on any pages that contain any blocks that failed
- * to be decrypted. The filesystem must not mark such pages uptodate.
+ * Return: %true on success; %false on failure. On failure, bio->bi_status is
+ * also set to an error status.
*/
-void fscrypt_decrypt_bio(struct bio *bio)
+bool fscrypt_decrypt_bio(struct bio *bio)
{
struct bio_vec *bv;
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bv, bio, iter_all) {
struct page *page = bv->bv_page;
- int ret = fscrypt_decrypt_pagecache_blocks(page, bv->bv_len,
+ int err = fscrypt_decrypt_pagecache_blocks(page, bv->bv_len,
bv->bv_offset);
- if (ret)
- SetPageError(page);
+
+ if (err) {
+ bio->bi_status = errno_to_blk_status(err);
+ return false;
+ }
}
+ return true;
}
EXPORT_SYMBOL(fscrypt_decrypt_bio);
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 3afdaa084773..d5f68a0c5d15 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -184,7 +184,7 @@ struct fscrypt_symlink_data {
struct fscrypt_prepared_key {
struct crypto_skcipher *tfm;
#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
- struct fscrypt_blk_crypto_key *blk_key;
+ struct blk_crypto_key *blk_key;
#endif
};
@@ -225,7 +225,7 @@ struct fscrypt_info {
* will be NULL if the master key was found in a process-subscribed
* keyring rather than in the filesystem-level keyring.
*/
- struct key *ci_master_key;
+ struct fscrypt_master_key *ci_master_key;
/*
* Link in list of inodes that were unlocked with the master key.
@@ -344,7 +344,8 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key,
const u8 *raw_key,
const struct fscrypt_info *ci);
-void fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key);
+void fscrypt_destroy_inline_crypt_key(struct super_block *sb,
+ struct fscrypt_prepared_key *prep_key);
/*
* Check whether the crypto transform or blk-crypto key has been allocated in
@@ -390,7 +391,8 @@ fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key,
}
static inline void
-fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key)
+fscrypt_destroy_inline_crypt_key(struct super_block *sb,
+ struct fscrypt_prepared_key *prep_key)
{
}
@@ -437,6 +439,40 @@ struct fscrypt_master_key_secret {
struct fscrypt_master_key {
/*
+ * Back-pointer to the super_block of the filesystem to which this
+ * master key has been added. Only valid if ->mk_active_refs > 0.
+ */
+ struct super_block *mk_sb;
+
+ /*
+ * Link in ->mk_sb->s_master_keys->key_hashtable.
+ * Only valid if ->mk_active_refs > 0.
+ */
+ struct hlist_node mk_node;
+
+ /* Semaphore that protects ->mk_secret and ->mk_users */
+ struct rw_semaphore mk_sem;
+
+ /*
+ * Active and structural reference counts. An active ref guarantees
+ * that the struct continues to exist, continues to be in the keyring
+ * ->mk_sb->s_master_keys, and that any embedded subkeys (e.g.
+ * ->mk_direct_keys) that have been prepared continue to exist.
+ * A structural ref only guarantees that the struct continues to exist.
+ *
+ * There is one active ref associated with ->mk_secret being present,
+ * and one active ref for each inode in ->mk_decrypted_inodes.
+ *
+ * There is one structural ref associated with the active refcount being
+ * nonzero. Finding a key in the keyring also takes a structural ref,
+ * which is then held temporarily while the key is operated on.
+ */
+ refcount_t mk_active_refs;
+ refcount_t mk_struct_refs;
+
+ struct rcu_head mk_rcu_head;
+
+ /*
* The secret key material. After FS_IOC_REMOVE_ENCRYPTION_KEY is
* executed, this is wiped and no new inodes can be unlocked with this
* key; however, there may still be inodes in ->mk_decrypted_inodes
@@ -444,7 +480,10 @@ struct fscrypt_master_key {
* FS_IOC_REMOVE_ENCRYPTION_KEY can be retried, or
* FS_IOC_ADD_ENCRYPTION_KEY can add the secret again.
*
- * Locking: protected by this master key's key->sem.
+ * While ->mk_secret is present, one ref in ->mk_active_refs is held.
+ *
+ * Locking: protected by ->mk_sem. The manipulation of ->mk_active_refs
+ * associated with this field is protected by ->mk_sem as well.
*/
struct fscrypt_master_key_secret mk_secret;
@@ -465,23 +504,13 @@ struct fscrypt_master_key {
*
* This is NULL for v1 policy keys; those can only be added by root.
*
- * Locking: in addition to this keyring's own semaphore, this is
- * protected by this master key's key->sem, so we can do atomic
- * search+insert. It can also be searched without taking any locks, but
- * in that case the returned key may have already been removed.
+ * Locking: protected by ->mk_sem. (We don't just rely on the keyrings
+ * subsystem semaphore ->mk_users->sem, as we need support for atomic
+ * search+insert along with proper synchronization with ->mk_secret.)
*/
struct key *mk_users;
/*
- * Length of ->mk_decrypted_inodes, plus one if mk_secret is present.
- * Once this goes to 0, the master key is removed from ->s_master_keys.
- * The 'struct fscrypt_master_key' will continue to live as long as the
- * 'struct key' whose payload it is, but we won't let this reference
- * count rise again.
- */
- refcount_t mk_refcount;
-
- /*
* List of inodes that were unlocked using this key. This allows the
* inodes to be evicted efficiently if the key is removed.
*/
@@ -506,10 +535,10 @@ static inline bool
is_master_key_secret_present(const struct fscrypt_master_key_secret *secret)
{
/*
- * The READ_ONCE() is only necessary for fscrypt_drop_inode() and
- * fscrypt_key_describe(). These run in atomic context, so they can't
- * take the key semaphore and thus 'secret' can change concurrently
- * which would be a data race. But they only need to know whether the
+ * The READ_ONCE() is only necessary for fscrypt_drop_inode().
+ * fscrypt_drop_inode() runs in atomic context, so it can't take the key
+ * semaphore and thus 'secret' can change concurrently which would be a
+ * data race. But fscrypt_drop_inode() only need to know whether the
* secret *was* present at the time of check, so READ_ONCE() suffices.
*/
return READ_ONCE(secret->size) != 0;
@@ -538,7 +567,11 @@ static inline int master_key_spec_len(const struct fscrypt_key_specifier *spec)
return 0;
}
-struct key *
+void fscrypt_put_master_key(struct fscrypt_master_key *mk);
+
+void fscrypt_put_master_key_activeref(struct fscrypt_master_key *mk);
+
+struct fscrypt_master_key *
fscrypt_find_master_key(struct super_block *sb,
const struct fscrypt_key_specifier *mk_spec);
@@ -569,7 +602,8 @@ extern struct fscrypt_mode fscrypt_modes[];
int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key,
const u8 *raw_key, const struct fscrypt_info *ci);
-void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key);
+void fscrypt_destroy_prepared_key(struct super_block *sb,
+ struct fscrypt_prepared_key *prep_key);
int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key);
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 7c01025879b3..7b8c5a1104b5 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -5,8 +5,6 @@
* Encryption hooks for higher-level filesystem operations.
*/
-#include <linux/key.h>
-
#include "fscrypt_private.h"
/**
@@ -142,7 +140,6 @@ int fscrypt_prepare_setflags(struct inode *inode,
unsigned int oldflags, unsigned int flags)
{
struct fscrypt_info *ci;
- struct key *key;
struct fscrypt_master_key *mk;
int err;
@@ -158,14 +155,13 @@ int fscrypt_prepare_setflags(struct inode *inode,
ci = inode->i_crypt_info;
if (ci->ci_policy.version != FSCRYPT_POLICY_V2)
return -EINVAL;
- key = ci->ci_master_key;
- mk = key->payload.data[0];
- down_read(&key->sem);
+ mk = ci->ci_master_key;
+ down_read(&mk->mk_sem);
if (is_master_key_secret_present(&mk->mk_secret))
err = fscrypt_derive_dirhash_key(ci, mk);
else
err = -ENOKEY;
- up_read(&key->sem);
+ up_read(&mk->mk_sem);
return err;
}
return 0;
diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
index 90f3e68f166e..cea8b14007e6 100644
--- a/fs/crypto/inline_crypt.c
+++ b/fs/crypto/inline_crypt.c
@@ -21,26 +21,22 @@
#include "fscrypt_private.h"
-struct fscrypt_blk_crypto_key {
- struct blk_crypto_key base;
- int num_devs;
- struct request_queue *devs[];
-};
-
-static int fscrypt_get_num_devices(struct super_block *sb)
+static struct block_device **fscrypt_get_devices(struct super_block *sb,
+ unsigned int *num_devs)
{
- if (sb->s_cop->get_num_devices)
- return sb->s_cop->get_num_devices(sb);
- return 1;
-}
+ struct block_device **devs;
-static void fscrypt_get_devices(struct super_block *sb, int num_devs,
- struct request_queue **devs)
-{
- if (num_devs == 1)
- devs[0] = bdev_get_queue(sb->s_bdev);
- else
- sb->s_cop->get_devices(sb, devs);
+ if (sb->s_cop->get_devices) {
+ devs = sb->s_cop->get_devices(sb, num_devs);
+ if (devs)
+ return devs;
+ }
+ devs = kmalloc(sizeof(*devs), GFP_KERNEL);
+ if (!devs)
+ return ERR_PTR(-ENOMEM);
+ devs[0] = sb->s_bdev;
+ *num_devs = 1;
+ return devs;
}
static unsigned int fscrypt_get_dun_bytes(const struct fscrypt_info *ci)
@@ -74,15 +70,17 @@ static unsigned int fscrypt_get_dun_bytes(const struct fscrypt_info *ci)
* helpful for debugging problems where the "wrong" implementation is used.
*/
static void fscrypt_log_blk_crypto_impl(struct fscrypt_mode *mode,
- struct request_queue **devs,
- int num_devs,
+ struct block_device **devs,
+ unsigned int num_devs,
const struct blk_crypto_config *cfg)
{
- int i;
+ unsigned int i;
for (i = 0; i < num_devs; i++) {
+ struct request_queue *q = bdev_get_queue(devs[i]);
+
if (!IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) ||
- __blk_crypto_cfg_supported(devs[i]->crypto_profile, cfg)) {
+ __blk_crypto_cfg_supported(q->crypto_profile, cfg)) {
if (!xchg(&mode->logged_blk_crypto_native, 1))
pr_info("fscrypt: %s using blk-crypto (native)\n",
mode->friendly_name);
@@ -99,9 +97,9 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci)
const struct inode *inode = ci->ci_inode;
struct super_block *sb = inode->i_sb;
struct blk_crypto_config crypto_cfg;
- int num_devs;
- struct request_queue **devs;
- int i;
+ struct block_device **devs;
+ unsigned int num_devs;
+ unsigned int i;
/* The file must need contents encryption, not filenames encryption */
if (!S_ISREG(inode->i_mode))
@@ -129,20 +127,20 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci)
return 0;
/*
- * On all the filesystem's devices, blk-crypto must support the crypto
- * configuration that the file would use.
+ * On all the filesystem's block devices, blk-crypto must support the
+ * crypto configuration that the file would use.
*/
crypto_cfg.crypto_mode = ci->ci_mode->blk_crypto_mode;
crypto_cfg.data_unit_size = sb->s_blocksize;
crypto_cfg.dun_bytes = fscrypt_get_dun_bytes(ci);
- num_devs = fscrypt_get_num_devices(sb);
- devs = kmalloc_array(num_devs, sizeof(*devs), GFP_KERNEL);
- if (!devs)
- return -ENOMEM;
- fscrypt_get_devices(sb, num_devs, devs);
+
+ devs = fscrypt_get_devices(sb, &num_devs);
+ if (IS_ERR(devs))
+ return PTR_ERR(devs);
for (i = 0; i < num_devs; i++) {
- if (!blk_crypto_config_supported(devs[i], &crypto_cfg))
+ if (!blk_crypto_config_supported(bdev_get_queue(devs[i]),
+ &crypto_cfg))
goto out_free_devs;
}
@@ -162,49 +160,41 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key,
const struct inode *inode = ci->ci_inode;
struct super_block *sb = inode->i_sb;
enum blk_crypto_mode_num crypto_mode = ci->ci_mode->blk_crypto_mode;
- int num_devs = fscrypt_get_num_devices(sb);
- int queue_refs = 0;
- struct fscrypt_blk_crypto_key *blk_key;
+ struct blk_crypto_key *blk_key;
+ struct block_device **devs;
+ unsigned int num_devs;
+ unsigned int i;
int err;
- int i;
- blk_key = kzalloc(struct_size(blk_key, devs, num_devs), GFP_KERNEL);
+ blk_key = kmalloc(sizeof(*blk_key), GFP_KERNEL);
if (!blk_key)
return -ENOMEM;
- blk_key->num_devs = num_devs;
- fscrypt_get_devices(sb, num_devs, blk_key->devs);
-
- err = blk_crypto_init_key(&blk_key->base, raw_key, crypto_mode,
+ err = blk_crypto_init_key(blk_key, raw_key, crypto_mode,
fscrypt_get_dun_bytes(ci), sb->s_blocksize);
if (err) {
fscrypt_err(inode, "error %d initializing blk-crypto key", err);
goto fail;
}
- /*
- * We have to start using blk-crypto on all the filesystem's devices.
- * We also have to save all the request_queue's for later so that the
- * key can be evicted from them. This is needed because some keys
- * aren't destroyed until after the filesystem was already unmounted
- * (namely, the per-mode keys in struct fscrypt_master_key).
- */
+ /* Start using blk-crypto on all the filesystem's block devices. */
+ devs = fscrypt_get_devices(sb, &num_devs);
+ if (IS_ERR(devs)) {
+ err = PTR_ERR(devs);
+ goto fail;
+ }
for (i = 0; i < num_devs; i++) {
- if (!blk_get_queue(blk_key->devs[i])) {
- fscrypt_err(inode, "couldn't get request_queue");
- err = -EAGAIN;
- goto fail;
- }
- queue_refs++;
-
- err = blk_crypto_start_using_key(&blk_key->base,
- blk_key->devs[i]);
- if (err) {
- fscrypt_err(inode,
- "error %d starting to use blk-crypto", err);
- goto fail;
- }
+ err = blk_crypto_start_using_key(blk_key,
+ bdev_get_queue(devs[i]));
+ if (err)
+ break;
}
+ kfree(devs);
+ if (err) {
+ fscrypt_err(inode, "error %d starting to use blk-crypto", err);
+ goto fail;
+ }
+
/*
* Pairs with the smp_load_acquire() in fscrypt_is_key_prepared().
* I.e., here we publish ->blk_key with a RELEASE barrier so that
@@ -215,24 +205,29 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key,
return 0;
fail:
- for (i = 0; i < queue_refs; i++)
- blk_put_queue(blk_key->devs[i]);
kfree_sensitive(blk_key);
return err;
}
-void fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key)
+void fscrypt_destroy_inline_crypt_key(struct super_block *sb,
+ struct fscrypt_prepared_key *prep_key)
{
- struct fscrypt_blk_crypto_key *blk_key = prep_key->blk_key;
- int i;
+ struct blk_crypto_key *blk_key = prep_key->blk_key;
+ struct block_device **devs;
+ unsigned int num_devs;
+ unsigned int i;
- if (blk_key) {
- for (i = 0; i < blk_key->num_devs; i++) {
- blk_crypto_evict_key(blk_key->devs[i], &blk_key->base);
- blk_put_queue(blk_key->devs[i]);
- }
- kfree_sensitive(blk_key);
+ if (!blk_key)
+ return;
+
+ /* Evict the key from all the filesystem's block devices. */
+ devs = fscrypt_get_devices(sb, &num_devs);
+ if (!IS_ERR(devs)) {
+ for (i = 0; i < num_devs; i++)
+ blk_crypto_evict_key(bdev_get_queue(devs[i]), blk_key);
+ kfree(devs);
}
+ kfree_sensitive(blk_key);
}
bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode)
@@ -282,7 +277,7 @@ void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
ci = inode->i_crypt_info;
fscrypt_generate_dun(ci, first_lblk, dun);
- bio_crypt_set_ctx(bio, &ci->ci_enc_key.blk_key->base, dun, gfp_mask);
+ bio_crypt_set_ctx(bio, ci->ci_enc_key.blk_key, dun, gfp_mask);
}
EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx);
@@ -369,7 +364,7 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
* uses the same pointer. I.e., there's currently no need to support
* merging requests where the keys are the same but the pointers differ.
*/
- if (bc->bc_key != &inode->i_crypt_info->ci_enc_key.blk_key->base)
+ if (bc->bc_key != inode->i_crypt_info->ci_enc_key.blk_key)
return false;
fscrypt_generate_dun(inode->i_crypt_info, next_lblk, next_dun);
@@ -401,46 +396,45 @@ bool fscrypt_mergeable_bio_bh(struct bio *bio,
EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh);
/**
- * fscrypt_dio_supported() - check whether a DIO (direct I/O) request is
- * supported as far as encryption is concerned
- * @iocb: the file and position the I/O is targeting
- * @iter: the I/O data segment(s)
+ * fscrypt_dio_supported() - check whether DIO (direct I/O) is supported on an
+ * inode, as far as encryption is concerned
+ * @inode: the inode in question
*
* Return: %true if there are no encryption constraints that prevent DIO from
* being supported; %false if DIO is unsupported. (Note that in the
* %true case, the filesystem might have other, non-encryption-related
- * constraints that prevent DIO from actually being supported.)
+ * constraints that prevent DIO from actually being supported. Also, on
+ * encrypted files the filesystem is still responsible for only allowing
+ * DIO when requests are filesystem-block-aligned.)
*/
-bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
+bool fscrypt_dio_supported(struct inode *inode)
{
- const struct inode *inode = file_inode(iocb->ki_filp);
- const unsigned int blocksize = i_blocksize(inode);
+ int err;
/* If the file is unencrypted, no veto from us. */
if (!fscrypt_needs_contents_encryption(inode))
return true;
- /* We only support DIO with inline crypto, not fs-layer crypto. */
- if (!fscrypt_inode_uses_inline_crypto(inode))
- return false;
-
/*
- * Since the granularity of encryption is filesystem blocks, the file
- * position and total I/O length must be aligned to the filesystem block
- * size -- not just to the block device's logical block size as is
- * traditionally the case for DIO on many filesystems.
+ * We only support DIO with inline crypto, not fs-layer crypto.
*
- * We require that the user-provided memory buffers be filesystem block
- * aligned too. It is simpler to have a single alignment value required
- * for all properties of the I/O, as is normally the case for DIO.
- * Also, allowing less aligned buffers would imply that data units could
- * cross bvecs, which would greatly complicate the I/O stack, which
- * assumes that bios can be split at any bvec boundary.
+ * To determine whether the inode is using inline crypto, we have to set
+ * up the key if it wasn't already done. This is because in the current
+ * design of fscrypt, the decision of whether to use inline crypto or
+ * not isn't made until the inode's encryption key is being set up. In
+ * the DIO read/write case, the key will always be set up already, since
+ * the file will be open. But in the case of statx(), the key might not
+ * be set up yet, as the file might not have been opened yet.
*/
- if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize))
+ err = fscrypt_require_key(inode);
+ if (err) {
+ /*
+ * Key unavailable or couldn't be set up. This edge case isn't
+ * worth worrying about; just report that DIO is unsupported.
+ */
return false;
-
- return true;
+ }
+ return fscrypt_inode_uses_inline_crypto(inode);
}
EXPORT_SYMBOL_GPL(fscrypt_dio_supported);
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index caee9f8620dd..2a24b1f0ae68 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -18,6 +18,7 @@
* information about these ioctls.
*/
+#include <asm/unaligned.h>
#include <crypto/skcipher.h>
#include <linux/key-type.h>
#include <linux/random.h>
@@ -25,6 +26,18 @@
#include "fscrypt_private.h"
+/* The master encryption keys for a filesystem (->s_master_keys) */
+struct fscrypt_keyring {
+ /*
+ * Lock that protects ->key_hashtable. It does *not* protect the
+ * fscrypt_master_key structs themselves.
+ */
+ spinlock_t lock;
+
+ /* Hash table that maps fscrypt_key_specifier to fscrypt_master_key */
+ struct hlist_head key_hashtable[128];
+};
+
static void wipe_master_key_secret(struct fscrypt_master_key_secret *secret)
{
fscrypt_destroy_hkdf(&secret->hkdf);
@@ -38,66 +51,81 @@ static void move_master_key_secret(struct fscrypt_master_key_secret *dst,
memzero_explicit(src, sizeof(*src));
}
-static void free_master_key(struct fscrypt_master_key *mk)
+static void fscrypt_free_master_key(struct rcu_head *head)
{
- size_t i;
-
- wipe_master_key_secret(&mk->mk_secret);
-
- for (i = 0; i <= FSCRYPT_MODE_MAX; i++) {
- fscrypt_destroy_prepared_key(&mk->mk_direct_keys[i]);
- fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_64_keys[i]);
- fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_32_keys[i]);
- }
-
- key_put(mk->mk_users);
+ struct fscrypt_master_key *mk =
+ container_of(head, struct fscrypt_master_key, mk_rcu_head);
+ /*
+ * The master key secret and any embedded subkeys should have already
+ * been wiped when the last active reference to the fscrypt_master_key
+ * struct was dropped; doing it here would be unnecessarily late.
+ * Nevertheless, use kfree_sensitive() in case anything was missed.
+ */
kfree_sensitive(mk);
}
-static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec)
+void fscrypt_put_master_key(struct fscrypt_master_key *mk)
{
- if (spec->__reserved)
- return false;
- return master_key_spec_len(spec) != 0;
+ if (!refcount_dec_and_test(&mk->mk_struct_refs))
+ return;
+ /*
+ * No structural references left, so free ->mk_users, and also free the
+ * fscrypt_master_key struct itself after an RCU grace period ensures
+ * that concurrent keyring lookups can no longer find it.
+ */
+ WARN_ON(refcount_read(&mk->mk_active_refs) != 0);
+ key_put(mk->mk_users);
+ mk->mk_users = NULL;
+ call_rcu(&mk->mk_rcu_head, fscrypt_free_master_key);
}
-static int fscrypt_key_instantiate(struct key *key,
- struct key_preparsed_payload *prep)
+void fscrypt_put_master_key_activeref(struct fscrypt_master_key *mk)
{
- key->payload.data[0] = (struct fscrypt_master_key *)prep->data;
- return 0;
-}
+ struct super_block *sb = mk->mk_sb;
+ struct fscrypt_keyring *keyring = sb->s_master_keys;
+ size_t i;
-static void fscrypt_key_destroy(struct key *key)
-{
- free_master_key(key->payload.data[0]);
-}
+ if (!refcount_dec_and_test(&mk->mk_active_refs))
+ return;
+ /*
+ * No active references left, so complete the full removal of this
+ * fscrypt_master_key struct by removing it from the keyring and
+ * destroying any subkeys embedded in it.
+ */
-static void fscrypt_key_describe(const struct key *key, struct seq_file *m)
-{
- seq_puts(m, key->description);
+ spin_lock(&keyring->lock);
+ hlist_del_rcu(&mk->mk_node);
+ spin_unlock(&keyring->lock);
- if (key_is_positive(key)) {
- const struct fscrypt_master_key *mk = key->payload.data[0];
+ /*
+ * ->mk_active_refs == 0 implies that ->mk_secret is not present and
+ * that ->mk_decrypted_inodes is empty.
+ */
+ WARN_ON(is_master_key_secret_present(&mk->mk_secret));
+ WARN_ON(!list_empty(&mk->mk_decrypted_inodes));
- if (!is_master_key_secret_present(&mk->mk_secret))
- seq_puts(m, ": secret removed");
+ for (i = 0; i <= FSCRYPT_MODE_MAX; i++) {
+ fscrypt_destroy_prepared_key(
+ sb, &mk->mk_direct_keys[i]);
+ fscrypt_destroy_prepared_key(
+ sb, &mk->mk_iv_ino_lblk_64_keys[i]);
+ fscrypt_destroy_prepared_key(
+ sb, &mk->mk_iv_ino_lblk_32_keys[i]);
}
+ memzero_explicit(&mk->mk_ino_hash_key,
+ sizeof(mk->mk_ino_hash_key));
+ mk->mk_ino_hash_key_initialized = false;
+
+ /* Drop the structural ref associated with the active refs. */
+ fscrypt_put_master_key(mk);
}
-/*
- * Type of key in ->s_master_keys. Each key of this type represents a master
- * key which has been added to the filesystem. Its payload is a
- * 'struct fscrypt_master_key'. The "." prefix in the key type name prevents
- * users from adding keys of this type via the keyrings syscalls rather than via
- * the intended method of FS_IOC_ADD_ENCRYPTION_KEY.
- */
-static struct key_type key_type_fscrypt = {
- .name = "._fscrypt",
- .instantiate = fscrypt_key_instantiate,
- .destroy = fscrypt_key_destroy,
- .describe = fscrypt_key_describe,
-};
+static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec)
+{
+ if (spec->__reserved)
+ return false;
+ return master_key_spec_len(spec) != 0;
+}
static int fscrypt_user_key_instantiate(struct key *key,
struct key_preparsed_payload *prep)
@@ -131,32 +159,6 @@ static struct key_type key_type_fscrypt_user = {
.describe = fscrypt_user_key_describe,
};
-/* Search ->s_master_keys or ->mk_users */
-static struct key *search_fscrypt_keyring(struct key *keyring,
- struct key_type *type,
- const char *description)
-{
- /*
- * We need to mark the keyring reference as "possessed" so that we
- * acquire permission to search it, via the KEY_POS_SEARCH permission.
- */
- key_ref_t keyref = make_key_ref(keyring, true /* possessed */);
-
- keyref = keyring_search(keyref, type, description, false);
- if (IS_ERR(keyref)) {
- if (PTR_ERR(keyref) == -EAGAIN || /* not found */
- PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */
- keyref = ERR_PTR(-ENOKEY);
- return ERR_CAST(keyref);
- }
- return key_ref_to_ptr(keyref);
-}
-
-#define FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE \
- (CONST_STRLEN("fscrypt-") + sizeof_field(struct super_block, s_id))
-
-#define FSCRYPT_MK_DESCRIPTION_SIZE (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + 1)
-
#define FSCRYPT_MK_USERS_DESCRIPTION_SIZE \
(CONST_STRLEN("fscrypt-") + 2 * FSCRYPT_KEY_IDENTIFIER_SIZE + \
CONST_STRLEN("-users") + 1)
@@ -164,21 +166,6 @@ static struct key *search_fscrypt_keyring(struct key *keyring,
#define FSCRYPT_MK_USER_DESCRIPTION_SIZE \
(2 * FSCRYPT_KEY_IDENTIFIER_SIZE + CONST_STRLEN(".uid.") + 10 + 1)
-static void format_fs_keyring_description(
- char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE],
- const struct super_block *sb)
-{
- sprintf(description, "fscrypt-%s", sb->s_id);
-}
-
-static void format_mk_description(
- char description[FSCRYPT_MK_DESCRIPTION_SIZE],
- const struct fscrypt_key_specifier *mk_spec)
-{
- sprintf(description, "%*phN",
- master_key_spec_len(mk_spec), (u8 *)&mk_spec->u);
-}
-
static void format_mk_users_keyring_description(
char description[FSCRYPT_MK_USERS_DESCRIPTION_SIZE],
const u8 mk_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE])
@@ -199,20 +186,15 @@ static void format_mk_user_description(
/* Create ->s_master_keys if needed. Synchronized by fscrypt_add_key_mutex. */
static int allocate_filesystem_keyring(struct super_block *sb)
{
- char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE];
- struct key *keyring;
+ struct fscrypt_keyring *keyring;
if (sb->s_master_keys)
return 0;
- format_fs_keyring_description(description, sb);
- keyring = keyring_alloc(description, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
- current_cred(), KEY_POS_SEARCH |
- KEY_USR_SEARCH | KEY_USR_READ | KEY_USR_VIEW,
- KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
- if (IS_ERR(keyring))
- return PTR_ERR(keyring);
-
+ keyring = kzalloc(sizeof(*keyring), GFP_KERNEL);
+ if (!keyring)
+ return -ENOMEM;
+ spin_lock_init(&keyring->lock);
/*
* Pairs with the smp_load_acquire() in fscrypt_find_master_key().
* I.e., here we publish ->s_master_keys with a RELEASE barrier so that
@@ -222,21 +204,80 @@ static int allocate_filesystem_keyring(struct super_block *sb)
return 0;
}
-void fscrypt_sb_free(struct super_block *sb)
+/*
+ * Release all encryption keys that have been added to the filesystem, along
+ * with the keyring that contains them.
+ *
+ * This is called at unmount time. The filesystem's underlying block device(s)
+ * are still available at this time; this is important because after user file
+ * accesses have been allowed, this function may need to evict keys from the
+ * keyslots of an inline crypto engine, which requires the block device(s).
+ *
+ * This is also called when the super_block is being freed. This is needed to
+ * avoid a memory leak if mounting fails after the "test_dummy_encryption"
+ * option was processed, as in that case the unmount-time call isn't made.
+ */
+void fscrypt_destroy_keyring(struct super_block *sb)
{
- key_put(sb->s_master_keys);
+ struct fscrypt_keyring *keyring = sb->s_master_keys;
+ size_t i;
+
+ if (!keyring)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(keyring->key_hashtable); i++) {
+ struct hlist_head *bucket = &keyring->key_hashtable[i];
+ struct fscrypt_master_key *mk;
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(mk, tmp, bucket, mk_node) {
+ /*
+ * Since all inodes were already evicted, every key
+ * remaining in the keyring should have an empty inode
+ * list, and should only still be in the keyring due to
+ * the single active ref associated with ->mk_secret.
+ * There should be no structural refs beyond the one
+ * associated with the active ref.
+ */
+ WARN_ON(refcount_read(&mk->mk_active_refs) != 1);
+ WARN_ON(refcount_read(&mk->mk_struct_refs) != 1);
+ WARN_ON(!is_master_key_secret_present(&mk->mk_secret));
+ wipe_master_key_secret(&mk->mk_secret);
+ fscrypt_put_master_key_activeref(mk);
+ }
+ }
+ kfree_sensitive(keyring);
sb->s_master_keys = NULL;
}
+static struct hlist_head *
+fscrypt_mk_hash_bucket(struct fscrypt_keyring *keyring,
+ const struct fscrypt_key_specifier *mk_spec)
+{
+ /*
+ * Since key specifiers should be "random" values, it is sufficient to
+ * use a trivial hash function that just takes the first several bits of
+ * the key specifier.
+ */
+ unsigned long i = get_unaligned((unsigned long *)&mk_spec->u);
+
+ return &keyring->key_hashtable[i % ARRAY_SIZE(keyring->key_hashtable)];
+}
+
/*
- * Find the specified master key in ->s_master_keys.
- * Returns ERR_PTR(-ENOKEY) if not found.
+ * Find the specified master key struct in ->s_master_keys and take a structural
+ * ref to it. The structural ref guarantees that the key struct continues to
+ * exist, but it does *not* guarantee that ->s_master_keys continues to contain
+ * the key struct. The structural ref needs to be dropped by
+ * fscrypt_put_master_key(). Returns NULL if the key struct is not found.
*/
-struct key *fscrypt_find_master_key(struct super_block *sb,
- const struct fscrypt_key_specifier *mk_spec)
+struct fscrypt_master_key *
+fscrypt_find_master_key(struct super_block *sb,
+ const struct fscrypt_key_specifier *mk_spec)
{
- struct key *keyring;
- char description[FSCRYPT_MK_DESCRIPTION_SIZE];
+ struct fscrypt_keyring *keyring;
+ struct hlist_head *bucket;
+ struct fscrypt_master_key *mk;
/*
* Pairs with the smp_store_release() in allocate_filesystem_keyring().
@@ -246,10 +287,38 @@ struct key *fscrypt_find_master_key(struct super_block *sb,
*/
keyring = smp_load_acquire(&sb->s_master_keys);
if (keyring == NULL)
- return ERR_PTR(-ENOKEY); /* No keyring yet, so no keys yet. */
-
- format_mk_description(description, mk_spec);
- return search_fscrypt_keyring(keyring, &key_type_fscrypt, description);
+ return NULL; /* No keyring yet, so no keys yet. */
+
+ bucket = fscrypt_mk_hash_bucket(keyring, mk_spec);
+ rcu_read_lock();
+ switch (mk_spec->type) {
+ case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR:
+ hlist_for_each_entry_rcu(mk, bucket, mk_node) {
+ if (mk->mk_spec.type ==
+ FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR &&
+ memcmp(mk->mk_spec.u.descriptor,
+ mk_spec->u.descriptor,
+ FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 &&
+ refcount_inc_not_zero(&mk->mk_struct_refs))
+ goto out;
+ }
+ break;
+ case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER:
+ hlist_for_each_entry_rcu(mk, bucket, mk_node) {
+ if (mk->mk_spec.type ==
+ FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER &&
+ memcmp(mk->mk_spec.u.identifier,
+ mk_spec->u.identifier,
+ FSCRYPT_KEY_IDENTIFIER_SIZE) == 0 &&
+ refcount_inc_not_zero(&mk->mk_struct_refs))
+ goto out;
+ }
+ break;
+ }
+ mk = NULL;
+out:
+ rcu_read_unlock();
+ return mk;
}
static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk)
@@ -277,17 +346,30 @@ static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk)
static struct key *find_master_key_user(struct fscrypt_master_key *mk)
{
char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE];
+ key_ref_t keyref;
format_mk_user_description(description, mk->mk_spec.u.identifier);
- return search_fscrypt_keyring(mk->mk_users, &key_type_fscrypt_user,
- description);
+
+ /*
+ * We need to mark the keyring reference as "possessed" so that we
+ * acquire permission to search it, via the KEY_POS_SEARCH permission.
+ */
+ keyref = keyring_search(make_key_ref(mk->mk_users, true /*possessed*/),
+ &key_type_fscrypt_user, description, false);
+ if (IS_ERR(keyref)) {
+ if (PTR_ERR(keyref) == -EAGAIN || /* not found */
+ PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */
+ keyref = ERR_PTR(-ENOKEY);
+ return ERR_CAST(keyref);
+ }
+ return key_ref_to_ptr(keyref);
}
/*
* Give the current user a "key" in ->mk_users. This charges the user's quota
* and marks the master key as added by the current user, so that it cannot be
- * removed by another user with the key. Either the master key's key->sem must
- * be held for write, or the master key must be still undergoing initialization.
+ * removed by another user with the key. Either ->mk_sem must be held for
+ * write, or the master key must be still undergoing initialization.
*/
static int add_master_key_user(struct fscrypt_master_key *mk)
{
@@ -309,7 +391,7 @@ static int add_master_key_user(struct fscrypt_master_key *mk)
/*
* Remove the current user's "key" from ->mk_users.
- * The master key's key->sem must be held for write.
+ * ->mk_sem must be held for write.
*
* Returns 0 if removed, -ENOKEY if not found, or another -errno code.
*/
@@ -327,63 +409,49 @@ static int remove_master_key_user(struct fscrypt_master_key *mk)
}
/*
- * Allocate a new fscrypt_master_key which contains the given secret, set it as
- * the payload of a new 'struct key' of type fscrypt, and link the 'struct key'
- * into the given keyring. Synchronized by fscrypt_add_key_mutex.
+ * Allocate a new fscrypt_master_key, transfer the given secret over to it, and
+ * insert it into sb->s_master_keys.
*/
-static int add_new_master_key(struct fscrypt_master_key_secret *secret,
- const struct fscrypt_key_specifier *mk_spec,
- struct key *keyring)
+static int add_new_master_key(struct super_block *sb,
+ struct fscrypt_master_key_secret *secret,
+ const struct fscrypt_key_specifier *mk_spec)
{
+ struct fscrypt_keyring *keyring = sb->s_master_keys;
struct fscrypt_master_key *mk;
- char description[FSCRYPT_MK_DESCRIPTION_SIZE];
- struct key *key;
int err;
mk = kzalloc(sizeof(*mk), GFP_KERNEL);
if (!mk)
return -ENOMEM;
+ mk->mk_sb = sb;
+ init_rwsem(&mk->mk_sem);
+ refcount_set(&mk->mk_struct_refs, 1);
mk->mk_spec = *mk_spec;
- move_master_key_secret(&mk->mk_secret, secret);
-
- refcount_set(&mk->mk_refcount, 1); /* secret is present */
INIT_LIST_HEAD(&mk->mk_decrypted_inodes);
spin_lock_init(&mk->mk_decrypted_inodes_lock);
if (mk_spec->type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) {
err = allocate_master_key_users_keyring(mk);
if (err)
- goto out_free_mk;
+ goto out_put;
err = add_master_key_user(mk);
if (err)
- goto out_free_mk;
+ goto out_put;
}
- /*
- * Note that we don't charge this key to anyone's quota, since when
- * ->mk_users is in use those keys are charged instead, and otherwise
- * (when ->mk_users isn't in use) only root can add these keys.
- */
- format_mk_description(description, mk_spec);
- key = key_alloc(&key_type_fscrypt, description,
- GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
- KEY_POS_SEARCH | KEY_USR_SEARCH | KEY_USR_VIEW,
- KEY_ALLOC_NOT_IN_QUOTA, NULL);
- if (IS_ERR(key)) {
- err = PTR_ERR(key);
- goto out_free_mk;
- }
- err = key_instantiate_and_link(key, mk, sizeof(*mk), keyring, NULL);
- key_put(key);
- if (err)
- goto out_free_mk;
+ move_master_key_secret(&mk->mk_secret, secret);
+ refcount_set(&mk->mk_active_refs, 1); /* ->mk_secret is present */
+ spin_lock(&keyring->lock);
+ hlist_add_head_rcu(&mk->mk_node,
+ fscrypt_mk_hash_bucket(keyring, mk_spec));
+ spin_unlock(&keyring->lock);
return 0;
-out_free_mk:
- free_master_key(mk);
+out_put:
+ fscrypt_put_master_key(mk);
return err;
}
@@ -392,42 +460,34 @@ out_free_mk:
static int add_existing_master_key(struct fscrypt_master_key *mk,
struct fscrypt_master_key_secret *secret)
{
- struct key *mk_user;
- bool rekey;
int err;
/*
* If the current user is already in ->mk_users, then there's nothing to
- * do. (Not applicable for v1 policy keys, which have NULL ->mk_users.)
+ * do. Otherwise, we need to add the user to ->mk_users. (Neither is
+ * applicable for v1 policy keys, which have NULL ->mk_users.)
*/
if (mk->mk_users) {
- mk_user = find_master_key_user(mk);
+ struct key *mk_user = find_master_key_user(mk);
+
if (mk_user != ERR_PTR(-ENOKEY)) {
if (IS_ERR(mk_user))
return PTR_ERR(mk_user);
key_put(mk_user);
return 0;
}
- }
-
- /* If we'll be re-adding ->mk_secret, try to take the reference. */
- rekey = !is_master_key_secret_present(&mk->mk_secret);
- if (rekey && !refcount_inc_not_zero(&mk->mk_refcount))
- return KEY_DEAD;
-
- /* Add the current user to ->mk_users, if applicable. */
- if (mk->mk_users) {
err = add_master_key_user(mk);
- if (err) {
- if (rekey && refcount_dec_and_test(&mk->mk_refcount))
- return KEY_DEAD;
+ if (err)
return err;
- }
}
/* Re-add the secret if needed. */
- if (rekey)
+ if (!is_master_key_secret_present(&mk->mk_secret)) {
+ if (!refcount_inc_not_zero(&mk->mk_active_refs))
+ return KEY_DEAD;
move_master_key_secret(&mk->mk_secret, secret);
+ }
+
return 0;
}
@@ -436,38 +496,36 @@ static int do_add_master_key(struct super_block *sb,
const struct fscrypt_key_specifier *mk_spec)
{
static DEFINE_MUTEX(fscrypt_add_key_mutex);
- struct key *key;
+ struct fscrypt_master_key *mk;
int err;
mutex_lock(&fscrypt_add_key_mutex); /* serialize find + link */
-retry:
- key = fscrypt_find_master_key(sb, mk_spec);
- if (IS_ERR(key)) {
- err = PTR_ERR(key);
- if (err != -ENOKEY)
- goto out_unlock;
+
+ mk = fscrypt_find_master_key(sb, mk_spec);
+ if (!mk) {
/* Didn't find the key in ->s_master_keys. Add it. */
err = allocate_filesystem_keyring(sb);
- if (err)
- goto out_unlock;
- err = add_new_master_key(secret, mk_spec, sb->s_master_keys);
+ if (!err)
+ err = add_new_master_key(sb, secret, mk_spec);
} else {
/*
* Found the key in ->s_master_keys. Re-add the secret if
* needed, and add the user to ->mk_users if needed.
*/
- down_write(&key->sem);
- err = add_existing_master_key(key->payload.data[0], secret);
- up_write(&key->sem);
+ down_write(&mk->mk_sem);
+ err = add_existing_master_key(mk, secret);
+ up_write(&mk->mk_sem);
if (err == KEY_DEAD) {
- /* Key being removed or needs to be removed */
- key_invalidate(key);
- key_put(key);
- goto retry;
+ /*
+ * We found a key struct, but it's already been fully
+ * removed. Ignore the old struct and add a new one.
+ * fscrypt_add_key_mutex means we don't need to worry
+ * about concurrent adds.
+ */
+ err = add_new_master_key(sb, secret, mk_spec);
}
- key_put(key);
+ fscrypt_put_master_key(mk);
}
-out_unlock:
mutex_unlock(&fscrypt_add_key_mutex);
return err;
}
@@ -771,19 +829,19 @@ int fscrypt_verify_key_added(struct super_block *sb,
const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE])
{
struct fscrypt_key_specifier mk_spec;
- struct key *key, *mk_user;
struct fscrypt_master_key *mk;
+ struct key *mk_user;
int err;
mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER;
memcpy(mk_spec.u.identifier, identifier, FSCRYPT_KEY_IDENTIFIER_SIZE);
- key = fscrypt_find_master_key(sb, &mk_spec);
- if (IS_ERR(key)) {
- err = PTR_ERR(key);
+ mk = fscrypt_find_master_key(sb, &mk_spec);
+ if (!mk) {
+ err = -ENOKEY;
goto out;
}
- mk = key->payload.data[0];
+ down_read(&mk->mk_sem);
mk_user = find_master_key_user(mk);
if (IS_ERR(mk_user)) {
err = PTR_ERR(mk_user);
@@ -791,7 +849,8 @@ int fscrypt_verify_key_added(struct super_block *sb,
key_put(mk_user);
err = 0;
}
- key_put(key);
+ up_read(&mk->mk_sem);
+ fscrypt_put_master_key(mk);
out:
if (err == -ENOKEY && capable(CAP_FOWNER))
err = 0;
@@ -953,11 +1012,10 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
struct super_block *sb = file_inode(filp)->i_sb;
struct fscrypt_remove_key_arg __user *uarg = _uarg;
struct fscrypt_remove_key_arg arg;
- struct key *key;
struct fscrypt_master_key *mk;
u32 status_flags = 0;
int err;
- bool dead;
+ bool inodes_remain;
if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
@@ -977,12 +1035,10 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
return -EACCES;
/* Find the key being removed. */
- key = fscrypt_find_master_key(sb, &arg.key_spec);
- if (IS_ERR(key))
- return PTR_ERR(key);
- mk = key->payload.data[0];
-
- down_write(&key->sem);
+ mk = fscrypt_find_master_key(sb, &arg.key_spec);
+ if (!mk)
+ return -ENOKEY;
+ down_write(&mk->mk_sem);
/* If relevant, remove current user's (or all users) claim to the key */
if (mk->mk_users && mk->mk_users->keys.nr_leaves_on_tree != 0) {
@@ -991,7 +1047,7 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
else
err = remove_master_key_user(mk);
if (err) {
- up_write(&key->sem);
+ up_write(&mk->mk_sem);
goto out_put_key;
}
if (mk->mk_users->keys.nr_leaves_on_tree != 0) {
@@ -1003,26 +1059,22 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
status_flags |=
FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS;
err = 0;
- up_write(&key->sem);
+ up_write(&mk->mk_sem);
goto out_put_key;
}
}
/* No user claims remaining. Go ahead and wipe the secret. */
- dead = false;
+ err = -ENOKEY;
if (is_master_key_secret_present(&mk->mk_secret)) {
wipe_master_key_secret(&mk->mk_secret);
- dead = refcount_dec_and_test(&mk->mk_refcount);
- }
- up_write(&key->sem);
- if (dead) {
- /*
- * No inodes reference the key, and we wiped the secret, so the
- * key object is free to be removed from the keyring.
- */
- key_invalidate(key);
+ fscrypt_put_master_key_activeref(mk);
err = 0;
- } else {
+ }
+ inodes_remain = refcount_read(&mk->mk_active_refs) > 0;
+ up_write(&mk->mk_sem);
+
+ if (inodes_remain) {
/* Some inodes still reference this key; try to evict them. */
err = try_to_lock_encrypted_files(sb, mk);
if (err == -EBUSY) {
@@ -1038,7 +1090,7 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
* has been fully removed including all files locked.
*/
out_put_key:
- key_put(key);
+ fscrypt_put_master_key(mk);
if (err == 0)
err = put_user(status_flags, &uarg->removal_status_flags);
return err;
@@ -1085,7 +1137,6 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg)
{
struct super_block *sb = file_inode(filp)->i_sb;
struct fscrypt_get_key_status_arg arg;
- struct key *key;
struct fscrypt_master_key *mk;
int err;
@@ -1102,19 +1153,18 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg)
arg.user_count = 0;
memset(arg.__out_reserved, 0, sizeof(arg.__out_reserved));
- key = fscrypt_find_master_key(sb, &arg.key_spec);
- if (IS_ERR(key)) {
- if (key != ERR_PTR(-ENOKEY))
- return PTR_ERR(key);
+ mk = fscrypt_find_master_key(sb, &arg.key_spec);
+ if (!mk) {
arg.status = FSCRYPT_KEY_STATUS_ABSENT;
err = 0;
goto out;
}
- mk = key->payload.data[0];
- down_read(&key->sem);
+ down_read(&mk->mk_sem);
if (!is_master_key_secret_present(&mk->mk_secret)) {
- arg.status = FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED;
+ arg.status = refcount_read(&mk->mk_active_refs) > 0 ?
+ FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED :
+ FSCRYPT_KEY_STATUS_ABSENT /* raced with full removal */;
err = 0;
goto out_release_key;
}
@@ -1136,8 +1186,8 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg)
}
err = 0;
out_release_key:
- up_read(&key->sem);
- key_put(key);
+ up_read(&mk->mk_sem);
+ fscrypt_put_master_key(mk);
out:
if (!err && copy_to_user(uarg, &arg, sizeof(arg)))
err = -EFAULT;
@@ -1149,13 +1199,9 @@ int __init fscrypt_init_keyring(void)
{
int err;
- err = register_key_type(&key_type_fscrypt);
- if (err)
- return err;
-
err = register_key_type(&key_type_fscrypt_user);
if (err)
- goto err_unregister_fscrypt;
+ return err;
err = register_key_type(&key_type_fscrypt_provisioning);
if (err)
@@ -1165,7 +1211,5 @@ int __init fscrypt_init_keyring(void)
err_unregister_fscrypt_user:
unregister_key_type(&key_type_fscrypt_user);
-err_unregister_fscrypt:
- unregister_key_type(&key_type_fscrypt);
return err;
}
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index fbc71abdabe3..f7407071a952 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -9,7 +9,6 @@
*/
#include <crypto/skcipher.h>
-#include <linux/key.h>
#include <linux/random.h>
#include "fscrypt_private.h"
@@ -155,10 +154,12 @@ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key,
}
/* Destroy a crypto transform object and/or blk-crypto key. */
-void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key)
+void fscrypt_destroy_prepared_key(struct super_block *sb,
+ struct fscrypt_prepared_key *prep_key)
{
crypto_free_skcipher(prep_key->tfm);
- fscrypt_destroy_inline_crypt_key(prep_key);
+ fscrypt_destroy_inline_crypt_key(sb, prep_key);
+ memzero_explicit(prep_key, sizeof(*prep_key));
}
/* Given a per-file encryption key, set up the file's crypto transform object */
@@ -412,20 +413,18 @@ static bool fscrypt_valid_master_key_size(const struct fscrypt_master_key *mk,
/*
* Find the master key, then set up the inode's actual encryption key.
*
- * If the master key is found in the filesystem-level keyring, then the
- * corresponding 'struct key' is returned in *master_key_ret with its semaphore
- * read-locked. This is needed to ensure that only one task links the
- * fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race to create
- * an fscrypt_info for the same inode), and to synchronize the master key being
- * removed with a new inode starting to use it.
+ * If the master key is found in the filesystem-level keyring, then it is
+ * returned in *mk_ret with its semaphore read-locked. This is needed to ensure
+ * that only one task links the fscrypt_info into ->mk_decrypted_inodes (as
+ * multiple tasks may race to create an fscrypt_info for the same inode), and to
+ * synchronize the master key being removed with a new inode starting to use it.
*/
static int setup_file_encryption_key(struct fscrypt_info *ci,
bool need_dirhash_key,
- struct key **master_key_ret)
+ struct fscrypt_master_key **mk_ret)
{
- struct key *key;
- struct fscrypt_master_key *mk = NULL;
struct fscrypt_key_specifier mk_spec;
+ struct fscrypt_master_key *mk;
int err;
err = fscrypt_select_encryption_impl(ci);
@@ -436,11 +435,10 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
if (err)
return err;
- key = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec);
- if (IS_ERR(key)) {
- if (key != ERR_PTR(-ENOKEY) ||
- ci->ci_policy.version != FSCRYPT_POLICY_V1)
- return PTR_ERR(key);
+ mk = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec);
+ if (!mk) {
+ if (ci->ci_policy.version != FSCRYPT_POLICY_V1)
+ return -ENOKEY;
/*
* As a legacy fallback for v1 policies, search for the key in
@@ -450,9 +448,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
*/
return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci);
}
-
- mk = key->payload.data[0];
- down_read(&key->sem);
+ down_read(&mk->mk_sem);
/* Has the secret been removed (via FS_IOC_REMOVE_ENCRYPTION_KEY)? */
if (!is_master_key_secret_present(&mk->mk_secret)) {
@@ -480,18 +476,18 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
if (err)
goto out_release_key;
- *master_key_ret = key;
+ *mk_ret = mk;
return 0;
out_release_key:
- up_read(&key->sem);
- key_put(key);
+ up_read(&mk->mk_sem);
+ fscrypt_put_master_key(mk);
return err;
}
static void put_crypt_info(struct fscrypt_info *ci)
{
- struct key *key;
+ struct fscrypt_master_key *mk;
if (!ci)
return;
@@ -499,26 +495,21 @@ static void put_crypt_info(struct fscrypt_info *ci)
if (ci->ci_direct_key)
fscrypt_put_direct_key(ci->ci_direct_key);
else if (ci->ci_owns_key)
- fscrypt_destroy_prepared_key(&ci->ci_enc_key);
-
- key = ci->ci_master_key;
- if (key) {
- struct fscrypt_master_key *mk = key->payload.data[0];
+ fscrypt_destroy_prepared_key(ci->ci_inode->i_sb,
+ &ci->ci_enc_key);
+ mk = ci->ci_master_key;
+ if (mk) {
/*
* Remove this inode from the list of inodes that were unlocked
- * with the master key.
- *
- * In addition, if we're removing the last inode from a key that
- * already had its secret removed, invalidate the key so that it
- * gets removed from ->s_master_keys.
+ * with the master key. In addition, if we're removing the last
+ * inode from a master key struct that already had its secret
+ * removed, then complete the full removal of the struct.
*/
spin_lock(&mk->mk_decrypted_inodes_lock);
list_del(&ci->ci_master_key_link);
spin_unlock(&mk->mk_decrypted_inodes_lock);
- if (refcount_dec_and_test(&mk->mk_refcount))
- key_invalidate(key);
- key_put(key);
+ fscrypt_put_master_key_activeref(mk);
}
memzero_explicit(ci, sizeof(*ci));
kmem_cache_free(fscrypt_info_cachep, ci);
@@ -532,7 +523,7 @@ fscrypt_setup_encryption_info(struct inode *inode,
{
struct fscrypt_info *crypt_info;
struct fscrypt_mode *mode;
- struct key *master_key = NULL;
+ struct fscrypt_master_key *mk = NULL;
int res;
res = fscrypt_initialize(inode->i_sb->s_cop->flags);
@@ -555,8 +546,7 @@ fscrypt_setup_encryption_info(struct inode *inode,
WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE);
crypt_info->ci_mode = mode;
- res = setup_file_encryption_key(crypt_info, need_dirhash_key,
- &master_key);
+ res = setup_file_encryption_key(crypt_info, need_dirhash_key, &mk);
if (res)
goto out;
@@ -571,12 +561,9 @@ fscrypt_setup_encryption_info(struct inode *inode,
* We won the race and set ->i_crypt_info to our crypt_info.
* Now link it into the master key's inode list.
*/
- if (master_key) {
- struct fscrypt_master_key *mk =
- master_key->payload.data[0];
-
- refcount_inc(&mk->mk_refcount);
- crypt_info->ci_master_key = key_get(master_key);
+ if (mk) {
+ crypt_info->ci_master_key = mk;
+ refcount_inc(&mk->mk_active_refs);
spin_lock(&mk->mk_decrypted_inodes_lock);
list_add(&crypt_info->ci_master_key_link,
&mk->mk_decrypted_inodes);
@@ -586,9 +573,9 @@ fscrypt_setup_encryption_info(struct inode *inode,
}
res = 0;
out:
- if (master_key) {
- up_read(&master_key->sem);
- key_put(master_key);
+ if (mk) {
+ up_read(&mk->mk_sem);
+ fscrypt_put_master_key(mk);
}
put_crypt_info(crypt_info);
return res;
@@ -753,7 +740,6 @@ EXPORT_SYMBOL(fscrypt_free_inode);
int fscrypt_drop_inode(struct inode *inode)
{
const struct fscrypt_info *ci = fscrypt_get_info(inode);
- const struct fscrypt_master_key *mk;
/*
* If ci is NULL, then the inode doesn't have an encryption key set up
@@ -763,7 +749,6 @@ int fscrypt_drop_inode(struct inode *inode)
*/
if (!ci || !ci->ci_master_key)
return 0;
- mk = ci->ci_master_key->payload.data[0];
/*
* With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes
@@ -782,6 +767,6 @@ int fscrypt_drop_inode(struct inode *inode)
* then the thread removing the key will either evict the inode itself
* or will correctly detect that it wasn't evicted due to the race.
*/
- return !is_master_key_secret_present(&mk->mk_secret);
+ return !is_master_key_secret_present(&ci->ci_master_key->mk_secret);
}
EXPORT_SYMBOL_GPL(fscrypt_drop_inode);
diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c
index 2762c5350432..75dabd9b27f9 100644
--- a/fs/crypto/keysetup_v1.c
+++ b/fs/crypto/keysetup_v1.c
@@ -143,6 +143,7 @@ invalid:
/* Master key referenced by DIRECT_KEY policy */
struct fscrypt_direct_key {
+ struct super_block *dk_sb;
struct hlist_node dk_node;
refcount_t dk_refcount;
const struct fscrypt_mode *dk_mode;
@@ -154,7 +155,7 @@ struct fscrypt_direct_key {
static void free_direct_key(struct fscrypt_direct_key *dk)
{
if (dk) {
- fscrypt_destroy_prepared_key(&dk->dk_key);
+ fscrypt_destroy_prepared_key(dk->dk_sb, &dk->dk_key);
kfree_sensitive(dk);
}
}
@@ -231,6 +232,7 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key)
dk = kzalloc(sizeof(*dk), GFP_KERNEL);
if (!dk)
return ERR_PTR(-ENOMEM);
+ dk->dk_sb = ci->ci_inode->i_sb;
refcount_set(&dk->dk_refcount, 1);
dk->dk_mode = ci->ci_mode;
err = fscrypt_prepare_key(&dk->dk_key, raw_key, ci);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 80b8ca0f340b..46757c3052ef 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -744,12 +744,8 @@ int fscrypt_set_context(struct inode *inode, void *fs_data)
* delayed key setup that requires the inode number.
*/
if (ci->ci_policy.version == FSCRYPT_POLICY_V2 &&
- (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) {
- const struct fscrypt_master_key *mk =
- ci->ci_master_key->payload.data[0];
-
- fscrypt_hash_inode_number(ci, mk);
- }
+ (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32))
+ fscrypt_hash_inode_number(ci, ci->ci_master_key);
return inode->i_sb->s_cop->set_context(inode, &ctx, ctxsize, fs_data);
}
@@ -833,19 +829,6 @@ bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1,
}
EXPORT_SYMBOL_GPL(fscrypt_dummy_policies_equal);
-/* Deprecated, do not use */
-int fscrypt_set_test_dummy_encryption(struct super_block *sb, const char *arg,
- struct fscrypt_dummy_policy *dummy_policy)
-{
- struct fs_parameter param = {
- .type = fs_value_is_string,
- .string = arg ? (char *)arg : "",
- };
- return fscrypt_parse_test_dummy_encryption(&param, dummy_policy) ?:
- fscrypt_add_test_dummy_key(sb, dummy_policy);
-}
-EXPORT_SYMBOL_GPL(fscrypt_set_test_dummy_encryption);
-
/**
* fscrypt_show_test_dummy_encryption() - show '-o test_dummy_encryption'
* @seq: the seq_file to print the option to
diff --git a/fs/d_path.c b/fs/d_path.c
index e4e0ebad1f15..56a6ee4c6331 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -34,7 +34,7 @@ static bool prepend_char(struct prepend_buffer *p, unsigned char c)
}
/*
- * The source of the prepend data can be an optimistoc load
+ * The source of the prepend data can be an optimistic load
* of a dentry name and length. And because we don't hold any
* locks, the length and the pointer to the name may not be
* in sync if a concurrent rename happens, and the kernel
@@ -297,8 +297,7 @@ EXPORT_SYMBOL(d_path);
/*
* Helper function for dentry_operations.d_dname() members
*/
-char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
- const char *fmt, ...)
+char *dynamic_dname(char *buffer, int buflen, const char *fmt, ...)
{
va_list args;
char temp[64];
diff --git a/fs/dcache.c b/fs/dcache.c
index bb0c4d0038db..52e6d5fdab6b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2597,15 +2597,7 @@ EXPORT_SYMBOL(d_rehash);
static inline unsigned start_dir_add(struct inode *dir)
{
- /*
- * The caller holds a spinlock (dentry::d_lock). On !PREEMPT_RT
- * kernels spin_lock() implicitly disables preemption, but not on
- * PREEMPT_RT. So for RT it has to be done explicitly to protect
- * the sequence count write side critical section against a reader
- * or another writer preempting, which would result in a live lock.
- */
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- preempt_disable();
+ preempt_disable_nested();
for (;;) {
unsigned n = dir->i_dir_seq;
if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
@@ -2618,8 +2610,7 @@ static inline void end_dir_add(struct inode *dir, unsigned int n,
wait_queue_head_t *d_wait)
{
smp_store_release(&dir->i_dir_seq, n + 2);
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- preempt_enable();
+ preempt_enable_nested();
wake_up_all(d_wait);
}
@@ -3258,8 +3249,10 @@ void d_genocide(struct dentry *parent)
EXPORT_SYMBOL(d_genocide);
-void d_tmpfile(struct dentry *dentry, struct inode *inode)
+void d_tmpfile(struct file *file, struct inode *inode)
{
+ struct dentry *dentry = file->f_path.dentry;
+
inode_dec_link_count(inode);
BUG_ON(dentry->d_name.name != dentry->d_iname ||
!hlist_unhashed(&dentry->d_u.d_alias) ||
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 950c63fa4d0b..ddb3fc258df9 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -1121,7 +1121,7 @@ void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
}
EXPORT_SYMBOL_GPL(debugfs_print_regs32);
-static int debugfs_show_regset32(struct seq_file *s, void *data)
+static int debugfs_regset32_show(struct seq_file *s, void *data)
{
struct debugfs_regset32 *regset = s->private;
@@ -1136,17 +1136,7 @@ static int debugfs_show_regset32(struct seq_file *s, void *data)
return 0;
}
-static int debugfs_open_regset32(struct inode *inode, struct file *file)
-{
- return single_open(file, debugfs_show_regset32, inode->i_private);
-}
-
-static const struct file_operations fops_regset32 = {
- .open = debugfs_open_regset32,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(debugfs_regset32);
/**
* debugfs_create_regset32 - create a debugfs file that returns register values
@@ -1167,7 +1157,7 @@ void debugfs_create_regset32(const char *name, umode_t mode,
struct dentry *parent,
struct debugfs_regset32 *regset)
{
- debugfs_create_file(name, mode, parent, regset, &fops_regset32);
+ debugfs_create_file(name, mode, parent, regset, &debugfs_regset32_fops);
}
EXPORT_SYMBOL_GPL(debugfs_create_regset32);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 232cfdf095ae..2e8e112b1993 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -82,6 +82,8 @@ struct debugfs_mount_opts {
kuid_t uid;
kgid_t gid;
umode_t mode;
+ /* Opt_* bitfield. */
+ unsigned int opts;
};
enum {
@@ -111,6 +113,7 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
kgid_t gid;
char *p;
+ opts->opts = 0;
opts->mode = DEBUGFS_DEFAULT_MODE;
while ((p = strsep(&data, ",")) != NULL) {
@@ -145,24 +148,44 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
* but traditionally debugfs has ignored all mount options
*/
}
+
+ opts->opts |= BIT(token);
}
return 0;
}
-static int debugfs_apply_options(struct super_block *sb)
+static void _debugfs_apply_options(struct super_block *sb, bool remount)
{
struct debugfs_fs_info *fsi = sb->s_fs_info;
struct inode *inode = d_inode(sb->s_root);
struct debugfs_mount_opts *opts = &fsi->mount_opts;
- inode->i_mode &= ~S_IALLUGO;
- inode->i_mode |= opts->mode;
+ /*
+ * On remount, only reset mode/uid/gid if they were provided as mount
+ * options.
+ */
- inode->i_uid = opts->uid;
- inode->i_gid = opts->gid;
+ if (!remount || opts->opts & BIT(Opt_mode)) {
+ inode->i_mode &= ~S_IALLUGO;
+ inode->i_mode |= opts->mode;
+ }
- return 0;
+ if (!remount || opts->opts & BIT(Opt_uid))
+ inode->i_uid = opts->uid;
+
+ if (!remount || opts->opts & BIT(Opt_gid))
+ inode->i_gid = opts->gid;
+}
+
+static void debugfs_apply_options(struct super_block *sb)
+{
+ _debugfs_apply_options(sb, false);
+}
+
+static void debugfs_apply_options_remount(struct super_block *sb)
+{
+ _debugfs_apply_options(sb, true);
}
static int debugfs_remount(struct super_block *sb, int *flags, char *data)
@@ -175,7 +198,7 @@ static int debugfs_remount(struct super_block *sb, int *flags, char *data)
if (err)
goto fail;
- debugfs_apply_options(sb);
+ debugfs_apply_options_remount(sb);
fail:
return err;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f669163d5860..03d381377ae1 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -421,8 +421,6 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
unsigned long flags;
bio->bi_private = dio;
- /* don't account direct I/O as memory stall */
- bio_clear_flag(bio, BIO_WORKINGSET);
spin_lock_irqsave(&dio->bio_lock, flags);
dio->refcount++;
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 19ef136f9e4f..d60a8d8f109d 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -200,13 +200,13 @@ void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
if (!prev_seq) {
kref_get(&lkb->lkb_ref);
+ mutex_lock(&ls->ls_cb_mutex);
if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) {
- mutex_lock(&ls->ls_cb_mutex);
list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay);
- mutex_unlock(&ls->ls_cb_mutex);
} else {
queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
}
+ mutex_unlock(&ls->ls_cb_mutex);
}
out:
mutex_unlock(&lkb->lkb_cb_mutex);
@@ -288,10 +288,13 @@ void dlm_callback_stop(struct dlm_ls *ls)
void dlm_callback_suspend(struct dlm_ls *ls)
{
- set_bit(LSFL_CB_DELAY, &ls->ls_flags);
+ if (ls->ls_callback_wq) {
+ mutex_lock(&ls->ls_cb_mutex);
+ set_bit(LSFL_CB_DELAY, &ls->ls_flags);
+ mutex_unlock(&ls->ls_cb_mutex);
- if (ls->ls_callback_wq)
flush_workqueue(ls->ls_callback_wq);
+ }
}
#define MAX_CB_QUEUE 25
@@ -302,11 +305,11 @@ void dlm_callback_resume(struct dlm_ls *ls)
int count = 0, sum = 0;
bool empty;
- clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
-
if (!ls->ls_callback_wq)
return;
+ clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
+
more:
mutex_lock(&ls->ls_cb_mutex);
list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h
index 181ad7d20c4d..e5e05fcc5813 100644
--- a/fs/dlm/ast.h
+++ b/fs/dlm/ast.h
@@ -11,7 +11,6 @@
#ifndef __ASTD_DOT_H__
#define __ASTD_DOT_H__
-void dlm_del_ast(struct dlm_lkb *lkb);
int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
int status, uint32_t sbflags, uint64_t seq);
int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 8aca8085d24e..e34c3d2639a5 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -661,7 +661,7 @@ struct dlm_ls {
spinlock_t ls_recover_idr_lock;
wait_queue_head_t ls_wait_general;
wait_queue_head_t ls_recover_lock_wait;
- struct mutex ls_clear_proc_locks;
+ spinlock_t ls_clear_proc_locks;
struct list_head ls_root_list; /* root resources */
struct rw_semaphore ls_root_sem; /* protect root_list */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index dac7eb75dba9..94a72ede5764 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -401,7 +401,7 @@ static int pre_rsb_struct(struct dlm_ls *ls)
unlock any spinlocks, go back and call pre_rsb_struct again.
Otherwise, take an rsb off the list and return it. */
-static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
+static int get_rsb_struct(struct dlm_ls *ls, const void *name, int len,
struct dlm_rsb **r_ret)
{
struct dlm_rsb *r;
@@ -412,7 +412,8 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
count = ls->ls_new_rsb_count;
spin_unlock(&ls->ls_new_rsb_spin);
log_debug(ls, "find_rsb retry %d %d %s",
- count, dlm_config.ci_new_rsb_count, name);
+ count, dlm_config.ci_new_rsb_count,
+ (const char *)name);
return -EAGAIN;
}
@@ -448,7 +449,7 @@ static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen)
return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN);
}
-int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
+int dlm_search_rsb_tree(struct rb_root *tree, const void *name, int len,
struct dlm_rsb **r_ret)
{
struct rb_node *node = tree->rb_node;
@@ -546,7 +547,7 @@ static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree)
* while that rsb has a potentially stale master.)
*/
-static int find_rsb_dir(struct dlm_ls *ls, char *name, int len,
+static int find_rsb_dir(struct dlm_ls *ls, const void *name, int len,
uint32_t hash, uint32_t b,
int dir_nodeid, int from_nodeid,
unsigned int flags, struct dlm_rsb **r_ret)
@@ -724,7 +725,7 @@ static int find_rsb_dir(struct dlm_ls *ls, char *name, int len,
dlm_recover_locks) before we've made ourself master (in
dlm_recover_masters). */
-static int find_rsb_nodir(struct dlm_ls *ls, char *name, int len,
+static int find_rsb_nodir(struct dlm_ls *ls, const void *name, int len,
uint32_t hash, uint32_t b,
int dir_nodeid, int from_nodeid,
unsigned int flags, struct dlm_rsb **r_ret)
@@ -818,8 +819,9 @@ static int find_rsb_nodir(struct dlm_ls *ls, char *name, int len,
return error;
}
-static int find_rsb(struct dlm_ls *ls, char *name, int len, int from_nodeid,
- unsigned int flags, struct dlm_rsb **r_ret)
+static int find_rsb(struct dlm_ls *ls, const void *name, int len,
+ int from_nodeid, unsigned int flags,
+ struct dlm_rsb **r_ret)
{
uint32_t hash, b;
int dir_nodeid;
@@ -2864,17 +2866,9 @@ static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args)
static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_args *args)
{
- int rv = -EINVAL;
+ int rv = -EBUSY;
if (args->flags & DLM_LKF_CONVERT) {
- if (lkb->lkb_flags & DLM_IFL_MSTCPY)
- goto out;
-
- if (args->flags & DLM_LKF_QUECVT &&
- !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1])
- goto out;
-
- rv = -EBUSY;
if (lkb->lkb_status != DLM_LKSTS_GRANTED)
goto out;
@@ -2884,6 +2878,14 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
if (is_overlap(lkb))
goto out;
+
+ rv = -EINVAL;
+ if (lkb->lkb_flags & DLM_IFL_MSTCPY)
+ goto out;
+
+ if (args->flags & DLM_LKF_QUECVT &&
+ !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1])
+ goto out;
}
lkb->lkb_exflags = args->flags;
@@ -2900,11 +2902,25 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
#endif
rv = 0;
out:
- if (rv)
- log_debug(ls, "validate_lock_args %d %x %x %x %d %d %s",
+ switch (rv) {
+ case 0:
+ break;
+ case -EINVAL:
+ /* annoy the user because dlm usage is wrong */
+ WARN_ON(1);
+ log_error(ls, "%s %d %x %x %x %d %d %s", __func__,
+ rv, lkb->lkb_id, lkb->lkb_flags, args->flags,
+ lkb->lkb_status, lkb->lkb_wait_type,
+ lkb->lkb_resource->res_name);
+ break;
+ default:
+ log_debug(ls, "%s %d %x %x %x %d %d %s", __func__,
rv, lkb->lkb_id, lkb->lkb_flags, args->flags,
lkb->lkb_status, lkb->lkb_wait_type,
lkb->lkb_resource->res_name);
+ break;
+ }
+
return rv;
}
@@ -2918,23 +2934,12 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
- int rv = -EINVAL;
-
- if (lkb->lkb_flags & DLM_IFL_MSTCPY) {
- log_error(ls, "unlock on MSTCPY %x", lkb->lkb_id);
- dlm_print_lkb(lkb);
- goto out;
- }
-
- /* an lkb may still exist even though the lock is EOL'ed due to a
- cancel, unlock or failed noqueue request; an app can't use these
- locks; return same error as if the lkid had not been found at all */
+ int rv = -EBUSY;
- if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) {
- log_debug(ls, "unlock on ENDOFLIFE %x", lkb->lkb_id);
- rv = -ENOENT;
+ /* normal unlock not allowed if there's any op in progress */
+ if (!(args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) &&
+ (lkb->lkb_wait_type || lkb->lkb_wait_count))
goto out;
- }
/* an lkb may be waiting for an rsb lookup to complete where the
lookup was initiated by another lock */
@@ -2949,7 +2954,24 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
unhold_lkb(lkb); /* undoes create_lkb() */
}
/* caller changes -EBUSY to 0 for CANCEL and FORCEUNLOCK */
- rv = -EBUSY;
+ goto out;
+ }
+
+ rv = -EINVAL;
+ if (lkb->lkb_flags & DLM_IFL_MSTCPY) {
+ log_error(ls, "unlock on MSTCPY %x", lkb->lkb_id);
+ dlm_print_lkb(lkb);
+ goto out;
+ }
+
+ /* an lkb may still exist even though the lock is EOL'ed due to a
+ * cancel, unlock or failed noqueue request; an app can't use these
+ * locks; return same error as if the lkid had not been found at all
+ */
+
+ if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) {
+ log_debug(ls, "unlock on ENDOFLIFE %x", lkb->lkb_id);
+ rv = -ENOENT;
goto out;
}
@@ -3022,14 +3044,8 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
goto out;
}
/* add_to_waiters() will set OVERLAP_UNLOCK */
- goto out_ok;
}
- /* normal unlock not allowed if there's any op in progress */
- rv = -EBUSY;
- if (lkb->lkb_wait_type || lkb->lkb_wait_count)
- goto out;
-
out_ok:
/* an overlapping op shouldn't blow away exflags from other op */
lkb->lkb_exflags |= args->flags;
@@ -3037,11 +3053,25 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
lkb->lkb_astparam = args->astparam;
rv = 0;
out:
- if (rv)
- log_debug(ls, "validate_unlock_args %d %x %x %x %x %d %s", rv,
+ switch (rv) {
+ case 0:
+ break;
+ case -EINVAL:
+ /* annoy the user because dlm usage is wrong */
+ WARN_ON(1);
+ log_error(ls, "%s %d %x %x %x %x %d %s", __func__, rv,
+ lkb->lkb_id, lkb->lkb_flags, lkb->lkb_exflags,
+ args->flags, lkb->lkb_wait_type,
+ lkb->lkb_resource->res_name);
+ break;
+ default:
+ log_debug(ls, "%s %d %x %x %x %x %d %s", __func__, rv,
lkb->lkb_id, lkb->lkb_flags, lkb->lkb_exflags,
args->flags, lkb->lkb_wait_type,
lkb->lkb_resource->res_name);
+ break;
+ }
+
return rv;
}
@@ -3292,8 +3322,9 @@ static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
* request_lock(), convert_lock(), unlock_lock(), cancel_lock()
*/
-static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name,
- int len, struct dlm_args *args)
+static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
+ const void *name, int len,
+ struct dlm_args *args)
{
struct dlm_rsb *r;
int error;
@@ -3392,7 +3423,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
int mode,
struct dlm_lksb *lksb,
uint32_t flags,
- void *name,
+ const void *name,
unsigned int namelen,
uint32_t parent_lkid,
void (*ast) (void *astarg),
@@ -3438,7 +3469,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
if (error == -EINPROGRESS)
error = 0;
out_put:
- trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error);
+ trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error, true);
if (convert || error)
__put_lkb(ls, lkb);
@@ -3623,7 +3654,7 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
case cpu_to_le32(DLM_MSG_REQUEST_REPLY):
case cpu_to_le32(DLM_MSG_CONVERT_REPLY):
case cpu_to_le32(DLM_MSG_GRANT):
- if (!lkb->lkb_lvbptr)
+ if (!lkb->lkb_lvbptr || !(lkb->lkb_exflags & DLM_LKF_VALBLK))
break;
memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
break;
@@ -5080,8 +5111,11 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
down_read(&ls->ls_recv_active);
if (hd->h_cmd == DLM_MSG)
dlm_receive_message(ls, &p->message, nodeid);
- else
+ else if (hd->h_cmd == DLM_RCOM)
dlm_receive_rcom(ls, &p->rcom, nodeid);
+ else
+ log_error(ls, "invalid h_cmd %d from %d lockspace %x",
+ hd->h_cmd, nodeid, le32_to_cpu(hd->u.h_lockspace));
up_read(&ls->ls_recv_active);
dlm_put_lockspace(ls);
@@ -5801,6 +5835,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
{
struct dlm_lkb *lkb;
struct dlm_args args;
+ bool do_put = true;
int error;
dlm_lock_recovery(ls);
@@ -5811,13 +5846,14 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
goto out;
}
+ trace_dlm_lock_start(ls, lkb, name, namelen, mode, flags);
+
if (flags & DLM_LKF_VALBLK) {
ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
if (!ua->lksb.sb_lvbptr) {
kfree(ua);
- __put_lkb(ls, lkb);
error = -ENOMEM;
- goto out;
+ goto out_put;
}
}
#ifdef CONFIG_DLM_DEPRECATED_API
@@ -5831,8 +5867,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
kfree(ua->lksb.sb_lvbptr);
ua->lksb.sb_lvbptr = NULL;
kfree(ua);
- __put_lkb(ls, lkb);
- goto out;
+ goto out_put;
}
/* After ua is attached to lkb it will be freed by dlm_free_lkb().
@@ -5851,8 +5886,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
error = 0;
fallthrough;
default:
- __put_lkb(ls, lkb);
- goto out;
+ goto out_put;
}
/* add this new lkb to the per-process list of locks */
@@ -5860,6 +5894,11 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
hold_lkb(lkb);
list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
spin_unlock(&ua->proc->locks_spin);
+ do_put = false;
+ out_put:
+ trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error, false);
+ if (do_put)
+ __put_lkb(ls, lkb);
out:
dlm_unlock_recovery(ls);
return error;
@@ -5885,6 +5924,8 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
if (error)
goto out;
+ trace_dlm_lock_start(ls, lkb, NULL, 0, mode, flags);
+
/* user can change the params on its lock when it converts it, or
add an lvb that didn't exist before */
@@ -5922,6 +5963,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
error = 0;
out_put:
+ trace_dlm_lock_end(ls, lkb, NULL, 0, mode, flags, error, false);
dlm_put_lkb(lkb);
out:
dlm_unlock_recovery(ls);
@@ -6014,6 +6056,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
if (error)
goto out;
+ trace_dlm_unlock_start(ls, lkb, flags);
+
ua = lkb->lkb_ua;
if (lvb_in && ua->lksb.sb_lvbptr)
@@ -6042,6 +6086,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
spin_unlock(&ua->proc->locks_spin);
out_put:
+ trace_dlm_unlock_end(ls, lkb, flags, error);
dlm_put_lkb(lkb);
out:
dlm_unlock_recovery(ls);
@@ -6063,6 +6108,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
if (error)
goto out;
+ trace_dlm_unlock_start(ls, lkb, flags);
+
ua = lkb->lkb_ua;
if (ua_tmp->castparam)
ua->castparam = ua_tmp->castparam;
@@ -6080,6 +6127,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
if (error == -EBUSY)
error = 0;
out_put:
+ trace_dlm_unlock_end(ls, lkb, flags, error);
dlm_put_lkb(lkb);
out:
dlm_unlock_recovery(ls);
@@ -6101,6 +6149,8 @@ int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
if (error)
goto out;
+ trace_dlm_unlock_start(ls, lkb, flags);
+
ua = lkb->lkb_ua;
error = set_unlock_args(flags, ua, &args);
@@ -6129,6 +6179,7 @@ int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
if (error == -EBUSY)
error = 0;
out_put:
+ trace_dlm_unlock_end(ls, lkb, flags, error);
dlm_put_lkb(lkb);
out:
dlm_unlock_recovery(ls);
@@ -6184,7 +6235,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
{
struct dlm_lkb *lkb = NULL;
- mutex_lock(&ls->ls_clear_proc_locks);
+ spin_lock(&ls->ls_clear_proc_locks);
if (list_empty(&proc->locks))
goto out;
@@ -6196,7 +6247,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
else
lkb->lkb_flags |= DLM_IFL_DEAD;
out:
- mutex_unlock(&ls->ls_clear_proc_locks);
+ spin_unlock(&ls->ls_clear_proc_locks);
return lkb;
}
@@ -6233,7 +6284,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
dlm_put_lkb(lkb);
}
- mutex_lock(&ls->ls_clear_proc_locks);
+ spin_lock(&ls->ls_clear_proc_locks);
/* in-progress unlocks */
list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
@@ -6249,7 +6300,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
dlm_put_lkb(lkb);
}
- mutex_unlock(&ls->ls_clear_proc_locks);
+ spin_unlock(&ls->ls_clear_proc_locks);
dlm_unlock_recovery(ls);
}
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index a7b6474f009d..40c76b5544da 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -36,7 +36,7 @@ static inline void dlm_adjust_timeouts(struct dlm_ls *ls) { }
int dlm_master_lookup(struct dlm_ls *ls, int nodeid, char *name, int len,
unsigned int flags, int *r_nodeid, int *result);
-int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
+int dlm_search_rsb_tree(struct rb_root *tree, const void *name, int len,
struct dlm_rsb **r_ret);
void dlm_recover_purge(struct dlm_ls *ls);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 3972f4d86c75..bae050df7abf 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -416,7 +416,7 @@ static int new_lockspace(const char *name, const char *cluster,
if (namelen > DLM_LOCKSPACE_LEN || namelen == 0)
return -EINVAL;
- if (!lvblen || (lvblen % 8))
+ if (lvblen % 8)
return -EINVAL;
if (!try_module_get(THIS_MODULE))
@@ -584,7 +584,7 @@ static int new_lockspace(const char *name, const char *cluster,
atomic_set(&ls->ls_requestqueue_cnt, 0);
init_waitqueue_head(&ls->ls_requestqueue_wait);
mutex_init(&ls->ls_requestqueue_mutex);
- mutex_init(&ls->ls_clear_proc_locks);
+ spin_lock_init(&ls->ls_clear_proc_locks);
/* Due backwards compatibility with 3.1 we need to use maximum
* possible dlm message size to be sure the message will fit and
@@ -703,10 +703,11 @@ static int new_lockspace(const char *name, const char *cluster,
return error;
}
-int dlm_new_lockspace(const char *name, const char *cluster,
- uint32_t flags, int lvblen,
- const struct dlm_lockspace_ops *ops, void *ops_arg,
- int *ops_result, dlm_lockspace_t **lockspace)
+static int __dlm_new_lockspace(const char *name, const char *cluster,
+ uint32_t flags, int lvblen,
+ const struct dlm_lockspace_ops *ops,
+ void *ops_arg, int *ops_result,
+ dlm_lockspace_t **lockspace)
{
int error = 0;
@@ -732,6 +733,25 @@ int dlm_new_lockspace(const char *name, const char *cluster,
return error;
}
+int dlm_new_lockspace(const char *name, const char *cluster, uint32_t flags,
+ int lvblen, const struct dlm_lockspace_ops *ops,
+ void *ops_arg, int *ops_result,
+ dlm_lockspace_t **lockspace)
+{
+ return __dlm_new_lockspace(name, cluster, flags | DLM_LSFL_FS, lvblen,
+ ops, ops_arg, ops_result, lockspace);
+}
+
+int dlm_new_user_lockspace(const char *name, const char *cluster,
+ uint32_t flags, int lvblen,
+ const struct dlm_lockspace_ops *ops,
+ void *ops_arg, int *ops_result,
+ dlm_lockspace_t **lockspace)
+{
+ return __dlm_new_lockspace(name, cluster, flags, lvblen, ops,
+ ops_arg, ops_result, lockspace);
+}
+
static int lkb_idr_is_local(int id, void *p, void *data)
{
struct dlm_lkb *lkb = p;
diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h
index 306fc4f4ea15..03f4a4a3a871 100644
--- a/fs/dlm/lockspace.h
+++ b/fs/dlm/lockspace.h
@@ -12,6 +12,14 @@
#ifndef __LOCKSPACE_DOT_H__
#define __LOCKSPACE_DOT_H__
+/* DLM_LSFL_FS
+ * The lockspace user is in the kernel (i.e. filesystem). Enables
+ * direct bast/cast callbacks.
+ *
+ * internal lockspace flag - will be removed in future
+ */
+#define DLM_LSFL_FS 0x00000004
+
int dlm_lockspace_init(void);
void dlm_lockspace_exit(void);
struct dlm_ls *dlm_find_lockspace_global(uint32_t id);
@@ -20,6 +28,11 @@ struct dlm_ls *dlm_find_lockspace_device(int minor);
void dlm_put_lockspace(struct dlm_ls *ls);
void dlm_stop_lockspaces(void);
void dlm_stop_lockspaces_check(void);
+int dlm_new_user_lockspace(const char *name, const char *cluster,
+ uint32_t flags, int lvblen,
+ const struct dlm_lockspace_ops *ops,
+ void *ops_arg, int *ops_result,
+ dlm_lockspace_t **lockspace);
#endif /* __LOCKSPACE_DOT_H__ */
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index a4e84e8d94c8..59f64c596233 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1336,6 +1336,8 @@ struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
return NULL;
}
+ /* for dlm_lowcomms_commit_msg() */
+ kref_get(&msg->ref);
/* we assume if successful commit must called */
msg->idx = idx;
return msg;
@@ -1375,6 +1377,8 @@ void dlm_lowcomms_commit_msg(struct dlm_msg *msg)
{
_dlm_lowcomms_commit_msg(msg);
srcu_read_unlock(&connections_srcu, msg->idx);
+ /* because dlm_lowcomms_new_msg() */
+ kref_put(&msg->ref, dlm_msg_release);
}
#endif
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 67f68d48d60c..4de4b8651c6c 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -75,6 +75,7 @@ static struct genl_family family __ro_after_init = {
.version = DLM_GENL_VERSION,
.small_ops = dlm_nl_ops,
.n_small_ops = ARRAY_SIZE(dlm_nl_ops),
+ .resv_start_op = DLM_CMD_HELLO + 1,
.module = THIS_MODULE,
};
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 99e8f0744513..c5d27bccc3dc 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -16,6 +16,8 @@
#include <linux/slab.h>
#include <linux/sched/signal.h>
+#include <trace/events/dlm.h>
+
#include "dlm_internal.h"
#include "lockspace.h"
#include "lock.h"
@@ -184,7 +186,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
return;
ls = lkb->lkb_resource->res_ls;
- mutex_lock(&ls->ls_clear_proc_locks);
+ spin_lock(&ls->ls_clear_proc_locks);
/* If ORPHAN/DEAD flag is set, it means the process is dead so an ast
can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed
@@ -230,7 +232,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
spin_unlock(&proc->locks_spin);
}
out:
- mutex_unlock(&ls->ls_clear_proc_locks);
+ spin_unlock(&ls->ls_clear_proc_locks);
}
static int device_user_lock(struct dlm_user_proc *proc,
@@ -421,9 +423,9 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- error = dlm_new_lockspace(params->name, dlm_config.ci_cluster_name, params->flags,
- DLM_USER_LVB_LEN, NULL, NULL, NULL,
- &lockspace);
+ error = dlm_new_user_lockspace(params->name, dlm_config.ci_cluster_name,
+ params->flags, DLM_USER_LVB_LEN, NULL,
+ NULL, NULL, &lockspace);
if (error)
return error;
@@ -882,7 +884,9 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
goto try_another;
}
- if (cb.flags & DLM_CB_CAST) {
+ if (cb.flags & DLM_CB_BAST) {
+ trace_dlm_bast(lkb->lkb_resource->res_ls, lkb, cb.mode);
+ } else if (cb.flags & DLM_CB_CAST) {
new_mode = cb.mode;
if (!cb.sb_status && lkb->lkb_lksb->sb_lvbptr &&
@@ -891,6 +895,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
lkb->lkb_lksb->sb_status = cb.sb_status;
lkb->lkb_lksb->sb_flags = cb.sb_flags;
+ trace_dlm_ast(lkb->lkb_resource->res_ls, lkb);
}
rv = copy_result_to_user(lkb->lkb_ua,
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 5f2b49e13731..f2ed0c0266cb 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -506,7 +506,7 @@ ecryptfs_dentry_to_lower(struct dentry *dentry)
return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry;
}
-static inline struct path *
+static inline const struct path *
ecryptfs_dentry_to_lower_path(struct dentry *dentry)
{
return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 18d5b91cb573..268b74499c28 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -33,7 +33,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
struct iov_iter *to)
{
ssize_t rc;
- struct path *path;
+ const struct path *path;
struct file *file = iocb->ki_filp;
rc = generic_file_read_iter(iocb, to);
@@ -53,7 +53,7 @@ struct ecryptfs_getdents_callback {
};
/* Inspired by generic filldir in fs/readdir.c */
-static int
+static bool
ecryptfs_filldir(struct dir_context *ctx, const char *lower_name,
int lower_namelen, loff_t offset, u64 ino, unsigned int d_type)
{
@@ -61,18 +61,19 @@ ecryptfs_filldir(struct dir_context *ctx, const char *lower_name,
container_of(ctx, struct ecryptfs_getdents_callback, ctx);
size_t name_size;
char *name;
- int rc;
+ int err;
+ bool res;
buf->filldir_called++;
- rc = ecryptfs_decode_and_decrypt_filename(&name, &name_size,
- buf->sb, lower_name,
- lower_namelen);
- if (rc) {
- if (rc != -EINVAL) {
+ err = ecryptfs_decode_and_decrypt_filename(&name, &name_size,
+ buf->sb, lower_name,
+ lower_namelen);
+ if (err) {
+ if (err != -EINVAL) {
ecryptfs_printk(KERN_DEBUG,
"%s: Error attempting to decode and decrypt filename [%s]; rc = [%d]\n",
- __func__, lower_name, rc);
- return rc;
+ __func__, lower_name, err);
+ return false;
}
/* Mask -EINVAL errors as these are most likely due a plaintext
@@ -81,16 +82,15 @@ ecryptfs_filldir(struct dir_context *ctx, const char *lower_name,
* the "lost+found" dentry in the root directory of an Ext4
* filesystem.
*/
- return 0;
+ return true;
}
buf->caller->pos = buf->ctx.pos;
- rc = !dir_emit(buf->caller, name, name_size, ino, d_type);
+ res = dir_emit(buf->caller, name, name_size, ino, d_type);
kfree(name);
- if (!rc)
+ if (res)
buf->entries_written++;
-
- return rc;
+ return res;
}
/**
@@ -111,14 +111,8 @@ static int ecryptfs_readdir(struct file *file, struct dir_context *ctx)
lower_file = ecryptfs_file_to_lower(file);
rc = iterate_dir(lower_file, &buf.ctx);
ctx->pos = buf.ctx.pos;
- if (rc < 0)
- goto out;
- if (buf.filldir_called && !buf.entries_written)
- goto out;
- if (rc >= 0)
- fsstack_copy_attr_atime(inode,
- file_inode(lower_file));
-out:
+ if (rc >= 0 && (buf.entries_written || !buf.filldir_called))
+ fsstack_copy_attr_atime(inode, file_inode(lower_file));
return rc;
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 16d50dface59..c214fe0981bd 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -317,7 +317,7 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode)
static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry,
struct dentry *lower_dentry)
{
- struct path *path = ecryptfs_dentry_to_lower_path(dentry->d_parent);
+ const struct path *path = ecryptfs_dentry_to_lower_path(dentry->d_parent);
struct inode *inode, *lower_inode;
struct ecryptfs_dentry_info *dentry_info;
int rc = 0;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 2dd23a82e0de..2dc927ba067f 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -105,7 +105,7 @@ static int ecryptfs_init_lower_file(struct dentry *dentry,
struct file **lower_file)
{
const struct cred *cred = current_cred();
- struct path *path = ecryptfs_dentry_to_lower_path(dentry);
+ const struct path *path = ecryptfs_dentry_to_lower_path(dentry);
int rc;
rc = ecryptfs_privileged_open(lower_file, path->dentry, path->mnt,
diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c
index a0ef63cfcecb..9e4f47808bd5 100644
--- a/fs/efivarfs/vars.c
+++ b/fs/efivarfs/vars.c
@@ -651,22 +651,6 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes,
if (err)
return err;
- /*
- * Ensure that the available space hasn't shrunk below the safe level
- */
- status = check_var_size(attributes, *size + ucs2_strsize(name, 1024));
- if (status != EFI_SUCCESS) {
- if (status != EFI_UNSUPPORTED) {
- err = efi_status_to_err(status);
- goto out;
- }
-
- if (*size > 65536) {
- err = -ENOSPC;
- goto out;
- }
- }
-
status = efivar_set_variable_locked(name, vendor, attributes, *size,
data, false);
if (status != EFI_SUCCESS) {
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 2d55569f96ac..51b7ac7166d9 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -317,52 +317,61 @@ dstmap_out:
return ret;
}
-static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq,
- struct page **pagepool)
+static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
+ struct page **pagepool)
{
- const unsigned int nrpages_out =
+ const unsigned int inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
+ const unsigned int outpages =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const unsigned int righthalf = min_t(unsigned int, rq->outputsize,
PAGE_SIZE - rq->pageofs_out);
const unsigned int lefthalf = rq->outputsize - righthalf;
+ const unsigned int interlaced_offset =
+ rq->alg == Z_EROFS_COMPRESSION_SHIFTED ? 0 : rq->pageofs_out;
unsigned char *src, *dst;
- if (nrpages_out > 2) {
+ if (outpages > 2 && rq->alg == Z_EROFS_COMPRESSION_SHIFTED) {
DBG_BUGON(1);
- return -EIO;
+ return -EFSCORRUPTED;
}
if (rq->out[0] == *rq->in) {
- DBG_BUGON(nrpages_out != 1);
+ DBG_BUGON(rq->pageofs_out);
return 0;
}
- src = kmap_atomic(*rq->in) + rq->pageofs_in;
+ src = kmap_local_page(rq->in[inpages - 1]) + rq->pageofs_in;
if (rq->out[0]) {
- dst = kmap_atomic(rq->out[0]);
- memcpy(dst + rq->pageofs_out, src, righthalf);
- kunmap_atomic(dst);
+ dst = kmap_local_page(rq->out[0]);
+ memcpy(dst + rq->pageofs_out, src + interlaced_offset,
+ righthalf);
+ kunmap_local(dst);
}
- if (nrpages_out == 2) {
- DBG_BUGON(!rq->out[1]);
- if (rq->out[1] == *rq->in) {
+ if (outpages > inpages) {
+ DBG_BUGON(!rq->out[outpages - 1]);
+ if (rq->out[outpages - 1] != rq->in[inpages - 1]) {
+ dst = kmap_local_page(rq->out[outpages - 1]);
+ memcpy(dst, interlaced_offset ? src :
+ (src + righthalf), lefthalf);
+ kunmap_local(dst);
+ } else if (!interlaced_offset) {
memmove(src, src + righthalf, lefthalf);
- } else {
- dst = kmap_atomic(rq->out[1]);
- memcpy(dst, src + righthalf, lefthalf);
- kunmap_atomic(dst);
}
}
- kunmap_atomic(src);
+ kunmap_local(src);
return 0;
}
static struct z_erofs_decompressor decompressors[] = {
[Z_EROFS_COMPRESSION_SHIFTED] = {
- .decompress = z_erofs_shifted_transform,
+ .decompress = z_erofs_transform_plain,
.name = "shifted"
},
+ [Z_EROFS_COMPRESSION_INTERLACED] = {
+ .decompress = z_erofs_transform_plain,
+ .name = "interlaced"
+ },
[Z_EROFS_COMPRESSION_LZ4] = {
.decompress = z_erofs_lz4_decompress,
.name = "lz4"
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 5e59b3f523eb..091fd5adf818 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -217,6 +217,9 @@ again:
strm->buf.out_size = min_t(u32, outlen,
PAGE_SIZE - pageofs);
outlen -= strm->buf.out_size;
+ if (!rq->out[no] && rq->fillgaps) /* deduped */
+ rq->out[no] = erofs_allocpage(pagepool,
+ GFP_KERNEL | __GFP_NOFAIL);
if (rq->out[no])
strm->buf.out = kmap(rq->out[no]) + pageofs;
pageofs = 0;
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 2b48373f690b..dbcd24371002 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -25,6 +25,8 @@
#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008
#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008
#define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010
+#define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020
+#define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020
#define EROFS_ALL_FEATURE_INCOMPAT \
(EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
@@ -32,7 +34,9 @@
EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \
- EROFS_FEATURE_INCOMPAT_ZTAILPACKING)
+ EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \
+ EROFS_FEATURE_INCOMPAT_FRAGMENTS | \
+ EROFS_FEATURE_INCOMPAT_DEDUPE)
#define EROFS_SB_EXTSLOT_SIZE 16
@@ -71,7 +75,9 @@ struct erofs_super_block {
} __packed u1;
__le16 extra_devices; /* # of devices besides the primary device */
__le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */
- __u8 reserved2[38];
+ __u8 reserved[6];
+ __le64 packed_nid; /* nid of the special packed inode */
+ __u8 reserved2[24];
};
/*
@@ -295,16 +301,27 @@ struct z_erofs_lzma_cfgs {
* bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
* bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
* bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
+ * bit 4 : interlaced plain pcluster (0 - off; 1 - on)
+ * bit 5 : fragment pcluster (0 - off; 1 - on)
*/
#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008
+#define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010
+#define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020
+#define Z_EROFS_FRAGMENT_INODE_BIT 7
struct z_erofs_map_header {
- __le16 h_reserved1;
- /* indicates the encoded size of tailpacking data */
- __le16 h_idata_size;
+ union {
+ /* fragment data offset in the packed inode */
+ __le32 h_fragmentoff;
+ struct {
+ __le16 h_reserved1;
+ /* indicates the encoded size of tailpacking data */
+ __le16 h_idata_size;
+ };
+ };
__le16 h_advise;
/*
* bit 0-3 : algorithm type of head 1 (logical cluster type 01);
@@ -313,7 +330,8 @@ struct z_erofs_map_header {
__u8 h_algorithmtype;
/*
* bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
- * bit 3-7 : reserved.
+ * bit 3-6 : reserved;
+ * bit 7 : move the whole file into packed inode or not.
*/
__u8 h_clusterbits;
};
@@ -355,6 +373,9 @@ enum {
#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2
#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0
+/* (noncompact only, HEAD) This pcluster refers to partial decompressed data */
+#define Z_EROFS_VLE_DI_PARTIAL_REF (1 << 15)
+
/*
* D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the
* compressed block count of a compressed extent (in logical clusters, aka.
@@ -402,6 +423,10 @@ struct erofs_dirent {
/* check the EROFS on-disk layout strictly at compile time */
static inline void erofs_check_ondisk_layout_definitions(void)
{
+ const __le64 fmh = *(__le64 *)&(struct z_erofs_map_header) {
+ .h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT
+ };
+
BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32);
BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
@@ -419,6 +444,9 @@ static inline void erofs_check_ondisk_layout_definitions(void)
BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
+ /* exclude old compiler versions like gcc 7.5.0 */
+ BUILD_BUG_ON(__builtin_constant_p(fmh) ?
+ fmh != cpu_to_le64(1ULL << 63) : 0);
}
#endif
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index b5fd9d71e67f..fe05bc51f9f2 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -1,10 +1,16 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2022, Alibaba Cloud
+ * Copyright (C) 2022, Bytedance Inc. All rights reserved.
*/
#include <linux/fscache.h>
#include "internal.h"
+static DEFINE_MUTEX(erofs_domain_list_lock);
+static DEFINE_MUTEX(erofs_domain_cookies_lock);
+static LIST_HEAD(erofs_domain_list);
+static struct vfsmount *erofs_pseudo_mnt;
+
static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping,
loff_t start, size_t len)
{
@@ -234,113 +240,111 @@ out:
return ret;
}
-static int erofs_fscache_read_folio_inline(struct folio *folio,
- struct erofs_map_blocks *map)
-{
- struct super_block *sb = folio_mapping(folio)->host->i_sb;
- struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
- erofs_blk_t blknr;
- size_t offset, len;
- void *src, *dst;
-
- /* For tail packing layout, the offset may be non-zero. */
- offset = erofs_blkoff(map->m_pa);
- blknr = erofs_blknr(map->m_pa);
- len = map->m_llen;
-
- src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
- if (IS_ERR(src))
- return PTR_ERR(src);
-
- dst = kmap_local_folio(folio, 0);
- memcpy(dst, src + offset, len);
- memset(dst + len, 0, PAGE_SIZE - len);
- kunmap_local(dst);
-
- erofs_put_metabuf(&buf);
- return 0;
-}
-
-static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
+/*
+ * Read into page cache in the range described by (@pos, @len).
+ *
+ * On return, the caller is responsible for page unlocking if the output @unlock
+ * is true, or the callee will take this responsibility through netfs_io_request
+ * interface.
+ *
+ * The return value is the number of bytes successfully handled, or negative
+ * error code on failure. The only exception is that, the length of the range
+ * instead of the error code is returned on failure after netfs_io_request is
+ * allocated, so that .readahead() could advance rac accordingly.
+ */
+static int erofs_fscache_data_read(struct address_space *mapping,
+ loff_t pos, size_t len, bool *unlock)
{
- struct inode *inode = folio_mapping(folio)->host;
+ struct inode *inode = mapping->host;
struct super_block *sb = inode->i_sb;
+ struct netfs_io_request *rreq;
struct erofs_map_blocks map;
struct erofs_map_dev mdev;
- struct netfs_io_request *rreq;
- erofs_off_t pos;
- loff_t pstart;
+ struct iov_iter iter;
+ size_t count;
int ret;
- DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ);
+ *unlock = true;
- pos = folio_pos(folio);
map.m_la = pos;
-
ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
if (ret)
- goto out_unlock;
+ return ret;
- if (!(map.m_flags & EROFS_MAP_MAPPED)) {
- folio_zero_range(folio, 0, folio_size(folio));
- goto out_uptodate;
+ if (map.m_flags & EROFS_MAP_META) {
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
+ erofs_blk_t blknr;
+ size_t offset, size;
+ void *src;
+
+ /* For tail packing layout, the offset may be non-zero. */
+ offset = erofs_blkoff(map.m_pa);
+ blknr = erofs_blknr(map.m_pa);
+ size = map.m_llen;
+
+ src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
+ if (IS_ERR(src))
+ return PTR_ERR(src);
+
+ iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE);
+ if (copy_to_iter(src + offset, size, &iter) != size)
+ return -EFAULT;
+ iov_iter_zero(PAGE_SIZE - size, &iter);
+ erofs_put_metabuf(&buf);
+ return PAGE_SIZE;
}
- if (map.m_flags & EROFS_MAP_META) {
- ret = erofs_fscache_read_folio_inline(folio, &map);
- goto out_uptodate;
+ count = min_t(size_t, map.m_llen - (pos - map.m_la), len);
+ DBG_BUGON(!count || count % PAGE_SIZE);
+
+ if (!(map.m_flags & EROFS_MAP_MAPPED)) {
+ iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count);
+ iov_iter_zero(count, &iter);
+ return count;
}
mdev = (struct erofs_map_dev) {
.m_deviceid = map.m_deviceid,
.m_pa = map.m_pa,
};
-
ret = erofs_map_dev(sb, &mdev);
if (ret)
- goto out_unlock;
-
+ return ret;
- rreq = erofs_fscache_alloc_request(folio_mapping(folio),
- folio_pos(folio), folio_size(folio));
- if (IS_ERR(rreq)) {
- ret = PTR_ERR(rreq);
- goto out_unlock;
- }
+ rreq = erofs_fscache_alloc_request(mapping, pos, count);
+ if (IS_ERR(rreq))
+ return PTR_ERR(rreq);
- pstart = mdev.m_pa + (pos - map.m_la);
- return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
- rreq, pstart);
-
-out_uptodate:
- if (!ret)
- folio_mark_uptodate(folio);
-out_unlock:
- folio_unlock(folio);
- return ret;
+ *unlock = false;
+ erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
+ rreq, mdev.m_pa + (pos - map.m_la));
+ return count;
}
-static void erofs_fscache_advance_folios(struct readahead_control *rac,
- size_t len, bool unlock)
+static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
{
- while (len) {
- struct folio *folio = readahead_folio(rac);
- len -= folio_size(folio);
- if (unlock) {
+ bool unlock;
+ int ret;
+
+ DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ);
+
+ ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio),
+ folio_size(folio), &unlock);
+ if (unlock) {
+ if (ret > 0)
folio_mark_uptodate(folio);
- folio_unlock(folio);
- }
+ folio_unlock(folio);
}
+ return ret < 0 ? ret : 0;
}
static void erofs_fscache_readahead(struct readahead_control *rac)
{
- struct inode *inode = rac->mapping->host;
- struct super_block *sb = inode->i_sb;
- size_t len, count, done = 0;
- erofs_off_t pos;
- loff_t start, offset;
- int ret;
+ struct folio *folio;
+ size_t len, done = 0;
+ loff_t start, pos;
+ bool unlock;
+ int ret, size;
if (!readahead_count(rac))
return;
@@ -349,67 +353,22 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
len = readahead_length(rac);
do {
- struct erofs_map_blocks map;
- struct erofs_map_dev mdev;
- struct netfs_io_request *rreq;
-
pos = start + done;
- map.m_la = pos;
-
- ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
- if (ret)
+ ret = erofs_fscache_data_read(rac->mapping, pos,
+ len - done, &unlock);
+ if (ret <= 0)
return;
- offset = start + done;
- count = min_t(size_t, map.m_llen - (pos - map.m_la),
- len - done);
-
- if (!(map.m_flags & EROFS_MAP_MAPPED)) {
- struct iov_iter iter;
-
- iov_iter_xarray(&iter, READ, &rac->mapping->i_pages,
- offset, count);
- iov_iter_zero(count, &iter);
-
- erofs_fscache_advance_folios(rac, count, true);
- ret = count;
- continue;
- }
-
- if (map.m_flags & EROFS_MAP_META) {
- struct folio *folio = readahead_folio(rac);
-
- ret = erofs_fscache_read_folio_inline(folio, &map);
- if (!ret) {
+ size = ret;
+ while (size) {
+ folio = readahead_folio(rac);
+ size -= folio_size(folio);
+ if (unlock) {
folio_mark_uptodate(folio);
- ret = folio_size(folio);
+ folio_unlock(folio);
}
-
- folio_unlock(folio);
- continue;
}
-
- mdev = (struct erofs_map_dev) {
- .m_deviceid = map.m_deviceid,
- .m_pa = map.m_pa,
- };
- ret = erofs_map_dev(sb, &mdev);
- if (ret)
- return;
-
- rreq = erofs_fscache_alloc_request(rac->mapping, offset, count);
- if (IS_ERR(rreq))
- return;
- /*
- * Drop the ref of folios here. Unlock them in
- * rreq_unlock_folios() when rreq complete.
- */
- erofs_fscache_advance_folios(rac, count, false);
- ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
- rreq, mdev.m_pa + (pos - map.m_la));
- if (!ret)
- ret = count;
- } while (ret > 0 && ((done += ret) < len));
+ } while ((done += ret) < len);
}
static const struct address_space_operations erofs_fscache_meta_aops = {
@@ -421,9 +380,114 @@ const struct address_space_operations erofs_fscache_access_aops = {
.readahead = erofs_fscache_readahead,
};
-int erofs_fscache_register_cookie(struct super_block *sb,
- struct erofs_fscache **fscache,
- char *name, bool need_inode)
+static void erofs_fscache_domain_put(struct erofs_domain *domain)
+{
+ if (!domain)
+ return;
+ mutex_lock(&erofs_domain_list_lock);
+ if (refcount_dec_and_test(&domain->ref)) {
+ list_del(&domain->list);
+ if (list_empty(&erofs_domain_list)) {
+ kern_unmount(erofs_pseudo_mnt);
+ erofs_pseudo_mnt = NULL;
+ }
+ mutex_unlock(&erofs_domain_list_lock);
+ fscache_relinquish_volume(domain->volume, NULL, false);
+ kfree(domain->domain_id);
+ kfree(domain);
+ return;
+ }
+ mutex_unlock(&erofs_domain_list_lock);
+}
+
+static int erofs_fscache_register_volume(struct super_block *sb)
+{
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+ char *domain_id = sbi->opt.domain_id;
+ struct fscache_volume *volume;
+ char *name;
+ int ret = 0;
+
+ name = kasprintf(GFP_KERNEL, "erofs,%s",
+ domain_id ? domain_id : sbi->opt.fsid);
+ if (!name)
+ return -ENOMEM;
+
+ volume = fscache_acquire_volume(name, NULL, NULL, 0);
+ if (IS_ERR_OR_NULL(volume)) {
+ erofs_err(sb, "failed to register volume for %s", name);
+ ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
+ volume = NULL;
+ }
+
+ sbi->volume = volume;
+ kfree(name);
+ return ret;
+}
+
+static int erofs_fscache_init_domain(struct super_block *sb)
+{
+ int err;
+ struct erofs_domain *domain;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+ domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
+ if (!domain)
+ return -ENOMEM;
+
+ domain->domain_id = kstrdup(sbi->opt.domain_id, GFP_KERNEL);
+ if (!domain->domain_id) {
+ kfree(domain);
+ return -ENOMEM;
+ }
+
+ err = erofs_fscache_register_volume(sb);
+ if (err)
+ goto out;
+
+ if (!erofs_pseudo_mnt) {
+ erofs_pseudo_mnt = kern_mount(&erofs_fs_type);
+ if (IS_ERR(erofs_pseudo_mnt)) {
+ err = PTR_ERR(erofs_pseudo_mnt);
+ goto out;
+ }
+ }
+
+ domain->volume = sbi->volume;
+ refcount_set(&domain->ref, 1);
+ list_add(&domain->list, &erofs_domain_list);
+ sbi->domain = domain;
+ return 0;
+out:
+ kfree(domain->domain_id);
+ kfree(domain);
+ return err;
+}
+
+static int erofs_fscache_register_domain(struct super_block *sb)
+{
+ int err;
+ struct erofs_domain *domain;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+ mutex_lock(&erofs_domain_list_lock);
+ list_for_each_entry(domain, &erofs_domain_list, list) {
+ if (!strcmp(domain->domain_id, sbi->opt.domain_id)) {
+ sbi->domain = domain;
+ sbi->volume = domain->volume;
+ refcount_inc(&domain->ref);
+ mutex_unlock(&erofs_domain_list_lock);
+ return 0;
+ }
+ }
+ err = erofs_fscache_init_domain(sb);
+ mutex_unlock(&erofs_domain_list_lock);
+ return err;
+}
+
+static
+struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
+ char *name, bool need_inode)
{
struct fscache_volume *volume = EROFS_SB(sb)->volume;
struct erofs_fscache *ctx;
@@ -432,7 +496,7 @@ int erofs_fscache_register_cookie(struct super_block *sb,
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
name, strlen(name), NULL, 0, 0);
@@ -462,63 +526,149 @@ int erofs_fscache_register_cookie(struct super_block *sb,
ctx->inode = inode;
}
- *fscache = ctx;
- return 0;
+ return ctx;
err_cookie:
fscache_unuse_cookie(ctx->cookie, NULL, NULL);
fscache_relinquish_cookie(ctx->cookie, false);
- ctx->cookie = NULL;
err:
kfree(ctx);
- return ret;
+ return ERR_PTR(ret);
}
-void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache)
+static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
{
- struct erofs_fscache *ctx = *fscache;
-
- if (!ctx)
- return;
-
fscache_unuse_cookie(ctx->cookie, NULL, NULL);
fscache_relinquish_cookie(ctx->cookie, false);
- ctx->cookie = NULL;
-
iput(ctx->inode);
- ctx->inode = NULL;
-
+ kfree(ctx->name);
kfree(ctx);
- *fscache = NULL;
+}
+
+static
+struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb,
+ char *name, bool need_inode)
+{
+ int err;
+ struct inode *inode;
+ struct erofs_fscache *ctx;
+ struct erofs_domain *domain = EROFS_SB(sb)->domain;
+
+ ctx = erofs_fscache_acquire_cookie(sb, name, need_inode);
+ if (IS_ERR(ctx))
+ return ctx;
+
+ ctx->name = kstrdup(name, GFP_KERNEL);
+ if (!ctx->name) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ inode = new_inode(erofs_pseudo_mnt->mnt_sb);
+ if (!inode) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ ctx->domain = domain;
+ ctx->anon_inode = inode;
+ inode->i_private = ctx;
+ refcount_inc(&domain->ref);
+ return ctx;
+out:
+ erofs_fscache_relinquish_cookie(ctx);
+ return ERR_PTR(err);
+}
+
+static
+struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
+ char *name, bool need_inode)
+{
+ struct inode *inode;
+ struct erofs_fscache *ctx;
+ struct erofs_domain *domain = EROFS_SB(sb)->domain;
+ struct super_block *psb = erofs_pseudo_mnt->mnt_sb;
+
+ mutex_lock(&erofs_domain_cookies_lock);
+ spin_lock(&psb->s_inode_list_lock);
+ list_for_each_entry(inode, &psb->s_inodes, i_sb_list) {
+ ctx = inode->i_private;
+ if (!ctx || ctx->domain != domain || strcmp(ctx->name, name))
+ continue;
+ igrab(inode);
+ spin_unlock(&psb->s_inode_list_lock);
+ mutex_unlock(&erofs_domain_cookies_lock);
+ return ctx;
+ }
+ spin_unlock(&psb->s_inode_list_lock);
+ ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode);
+ mutex_unlock(&erofs_domain_cookies_lock);
+ return ctx;
+}
+
+struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
+ char *name, bool need_inode)
+{
+ if (EROFS_SB(sb)->opt.domain_id)
+ return erofs_domain_register_cookie(sb, name, need_inode);
+ return erofs_fscache_acquire_cookie(sb, name, need_inode);
+}
+
+void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
+{
+ bool drop;
+ struct erofs_domain *domain;
+
+ if (!ctx)
+ return;
+ domain = ctx->domain;
+ if (domain) {
+ mutex_lock(&erofs_domain_cookies_lock);
+ drop = atomic_read(&ctx->anon_inode->i_count) == 1;
+ iput(ctx->anon_inode);
+ mutex_unlock(&erofs_domain_cookies_lock);
+ if (!drop)
+ return;
+ }
+
+ erofs_fscache_relinquish_cookie(ctx);
+ erofs_fscache_domain_put(domain);
}
int erofs_fscache_register_fs(struct super_block *sb)
{
+ int ret;
struct erofs_sb_info *sbi = EROFS_SB(sb);
- struct fscache_volume *volume;
- char *name;
- int ret = 0;
+ struct erofs_fscache *fscache;
- name = kasprintf(GFP_KERNEL, "erofs,%s", sbi->opt.fsid);
- if (!name)
- return -ENOMEM;
+ if (sbi->opt.domain_id)
+ ret = erofs_fscache_register_domain(sb);
+ else
+ ret = erofs_fscache_register_volume(sb);
+ if (ret)
+ return ret;
- volume = fscache_acquire_volume(name, NULL, NULL, 0);
- if (IS_ERR_OR_NULL(volume)) {
- erofs_err(sb, "failed to register volume for %s", name);
- ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
- volume = NULL;
- }
+ /* acquired domain/volume will be relinquished in kill_sb() on error */
+ fscache = erofs_fscache_register_cookie(sb, sbi->opt.fsid, true);
+ if (IS_ERR(fscache))
+ return PTR_ERR(fscache);
- sbi->volume = volume;
- kfree(name);
- return ret;
+ sbi->s_fscache = fscache;
+ return 0;
}
void erofs_fscache_unregister_fs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
- fscache_relinquish_volume(sbi->volume, NULL, false);
+ erofs_fscache_unregister_cookie(sbi->s_fscache);
+
+ if (sbi->domain)
+ erofs_fscache_domain_put(sbi->domain);
+ else
+ fscache_relinquish_volume(sbi->volume, NULL, false);
+
+ sbi->s_fscache = NULL;
sbi->volume = NULL;
+ sbi->domain = NULL;
}
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 95a403720e8c..ad2a82f2eb4c 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -214,7 +214,7 @@ static int erofs_fill_symlink(struct inode *inode, void *kaddr,
/* if it cannot be handled with fast symlink scheme */
if (vi->datalayout != EROFS_INODE_FLAT_INLINE ||
- inode->i_size >= EROFS_BLKSIZ) {
+ inode->i_size >= EROFS_BLKSIZ || inode->i_size < 0) {
inode->i_op = &erofs_symlink_iops;
return 0;
}
@@ -241,7 +241,7 @@ static int erofs_fill_symlink(struct inode *inode, void *kaddr,
return 0;
}
-static int erofs_fill_inode(struct inode *inode, int isdir)
+static int erofs_fill_inode(struct inode *inode)
{
struct erofs_inode *vi = EROFS_I(inode);
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
@@ -249,7 +249,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir)
unsigned int ofs;
int err = 0;
- trace_erofs_fill_inode(inode, isdir);
+ trace_erofs_fill_inode(inode);
/* read inode base data from disk */
kaddr = erofs_read_inode(&buf, inode, &ofs);
@@ -324,21 +324,13 @@ static int erofs_iget_set_actor(struct inode *inode, void *opaque)
return 0;
}
-static inline struct inode *erofs_iget_locked(struct super_block *sb,
- erofs_nid_t nid)
+struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid)
{
const unsigned long hashval = erofs_inode_hash(nid);
+ struct inode *inode;
- return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
+ inode = iget5_locked(sb, hashval, erofs_ilookup_test_actor,
erofs_iget_set_actor, &nid);
-}
-
-struct inode *erofs_iget(struct super_block *sb,
- erofs_nid_t nid,
- bool isdir)
-{
- struct inode *inode = erofs_iget_locked(sb, nid);
-
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -348,10 +340,10 @@ struct inode *erofs_iget(struct super_block *sb,
vi->nid = nid;
- err = erofs_fill_inode(inode, isdir);
- if (!err)
+ err = erofs_fill_inode(inode);
+ if (!err) {
unlock_new_inode(inode);
- else {
+ } else {
iget_failed(inode);
inode = ERR_PTR(err);
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index a01cc82795a2..1701df48c446 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -76,6 +76,7 @@ struct erofs_mount_opts {
#endif
unsigned int mount_opt;
char *fsid;
+ char *domain_id;
};
struct erofs_dev_context {
@@ -98,9 +99,19 @@ struct erofs_sb_lz4_info {
u16 max_pclusterblks;
};
+struct erofs_domain {
+ refcount_t ref;
+ struct list_head list;
+ struct fscache_volume *volume;
+ char *domain_id;
+};
+
struct erofs_fscache {
struct fscache_cookie *cookie;
struct inode *inode;
+ struct inode *anon_inode;
+ struct erofs_domain *domain;
+ char *name;
};
struct erofs_sb_info {
@@ -120,6 +131,7 @@ struct erofs_sb_info {
struct inode *managed_cache;
struct erofs_sb_lz4_info lz4;
+ struct inode *packed_inode;
#endif /* CONFIG_EROFS_FS_ZIP */
struct erofs_dev_context *devs;
struct dax_device *dax_dev;
@@ -157,6 +169,7 @@ struct erofs_sb_info {
/* fscache support */
struct fscache_volume *volume;
struct erofs_fscache *s_fscache;
+ struct erofs_domain *domain;
};
#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
@@ -183,7 +196,6 @@ enum {
EROFS_ZIP_CACHE_READAROUND
};
-#ifdef CONFIG_EROFS_FS_ZIP
#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL)
/* basic unit of the workstation of a super_block */
@@ -223,7 +235,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
return atomic_cond_read_relaxed(&grp->refcount,
VAL != EROFS_LOCKED_MAGIC);
}
-#endif /* !CONFIG_EROFS_FS_ZIP */
/* we strictly follow PAGE_SIZE and no buffer head yet */
#define LOG_BLOCK_SIZE PAGE_SHIFT
@@ -277,6 +288,8 @@ EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2)
EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
+EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
+EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
/* atomic flag definitions */
@@ -312,8 +325,13 @@ struct erofs_inode {
unsigned char z_algorithmtype[2];
unsigned char z_logical_clusterbits;
unsigned long z_tailextent_headlcn;
- erofs_off_t z_idataoff;
- unsigned short z_idata_size;
+ union {
+ struct {
+ erofs_off_t z_idataoff;
+ unsigned short z_idata_size;
+ };
+ erofs_off_t z_fragmentoff;
+ };
};
#endif /* CONFIG_EROFS_FS_ZIP */
};
@@ -364,6 +382,7 @@ struct page *erofs_grab_cache_page_nowait(struct address_space *mapping,
}
extern const struct super_operations erofs_sops;
+extern struct file_system_type erofs_fs_type;
extern const struct address_space_operations erofs_raw_access_aops;
extern const struct address_space_operations z_erofs_aops;
@@ -371,6 +390,8 @@ extern const struct address_space_operations z_erofs_aops;
enum {
BH_Encoded = BH_PrivateStart,
BH_FullMapped,
+ BH_Fragment,
+ BH_Partialref,
};
/* Has a disk mapping */
@@ -381,6 +402,10 @@ enum {
#define EROFS_MAP_ENCODED (1 << BH_Encoded)
/* The length of extent is full */
#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped)
+/* Located in the special packed inode */
+#define EROFS_MAP_FRAGMENT (1 << BH_Fragment)
+/* The extent refers to partial decompressed data */
+#define EROFS_MAP_PARTIAL_REF (1 << BH_Partialref)
struct erofs_map_blocks {
struct erofs_buf buf;
@@ -402,11 +427,12 @@ struct erofs_map_blocks {
#define EROFS_GET_BLOCKS_FIEMAP 0x0002
/* Used to map the whole extent if non-negligible data is requested for LZMA */
#define EROFS_GET_BLOCKS_READMORE 0x0004
-/* Used to map tail extent for tailpacking inline pcluster */
+/* Used to map tail extent for tailpacking inline or fragment pcluster */
#define EROFS_GET_BLOCKS_FINDTAIL 0x0008
enum {
Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
+ Z_EROFS_COMPRESSION_INTERLACED,
Z_EROFS_COMPRESSION_RUNTIME_MAX
};
@@ -466,7 +492,7 @@ extern const struct inode_operations erofs_generic_iops;
extern const struct inode_operations erofs_symlink_iops;
extern const struct inode_operations erofs_fast_symlink_iops;
-struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir);
+struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid);
int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags);
@@ -581,27 +607,26 @@ static inline int z_erofs_load_lzma_config(struct super_block *sb,
int erofs_fscache_register_fs(struct super_block *sb);
void erofs_fscache_unregister_fs(struct super_block *sb);
-int erofs_fscache_register_cookie(struct super_block *sb,
- struct erofs_fscache **fscache,
- char *name, bool need_inode);
-void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache);
+struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
+ char *name, bool need_inode);
+void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache);
extern const struct address_space_operations erofs_fscache_access_aops;
#else
static inline int erofs_fscache_register_fs(struct super_block *sb)
{
- return 0;
+ return -EOPNOTSUPP;
}
static inline void erofs_fscache_unregister_fs(struct super_block *sb) {}
-static inline int erofs_fscache_register_cookie(struct super_block *sb,
- struct erofs_fscache **fscache,
- char *name, bool need_inode)
+static inline
+struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
+ char *name, bool need_inode)
{
- return -EOPNOTSUPP;
+ return ERR_PTR(-EOPNOTSUPP);
}
-static inline void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache)
+static inline void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache)
{
}
#endif
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
index fd75506799c4..0dc34721080c 100644
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -185,7 +185,6 @@ int erofs_namei(struct inode *dir, const struct qstr *name, erofs_nid_t *nid,
if (IS_ERR(de))
return PTR_ERR(de);
- /* the target page has been mapped */
if (ndirents)
de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents);
@@ -197,9 +196,7 @@ int erofs_namei(struct inode *dir, const struct qstr *name, erofs_nid_t *nid,
return PTR_ERR_OR_ZERO(de);
}
-/* NOTE: i_mutex is already held by vfs */
-static struct dentry *erofs_lookup(struct inode *dir,
- struct dentry *dentry,
+static struct dentry *erofs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
int err;
@@ -207,17 +204,11 @@ static struct dentry *erofs_lookup(struct inode *dir,
unsigned int d_type;
struct inode *inode;
- DBG_BUGON(!d_really_is_negative(dentry));
- /* dentry must be unhashed in lookup, no need to worry about */
- DBG_BUGON(!d_unhashed(dentry));
-
trace_erofs_lookup(dir, dentry, flags);
- /* file name exceeds fs limit */
if (dentry->d_name.len > EROFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- /* false uninitialized warnings on gcc 4.8.x */
err = erofs_namei(dir, &dentry->d_name, &nid, &d_type);
if (err == -ENOENT) {
@@ -228,7 +219,7 @@ static struct dentry *erofs_lookup(struct inode *dir,
} else {
erofs_dbg("%s, %pd (nid %llu) found, d_type %u", __func__,
dentry, nid, d_type);
- inode = erofs_iget(dir->i_sb, nid, d_type == FT_DIR);
+ inode = erofs_iget(dir->i_sb, nid);
}
return d_splice_alias(inode, dentry);
}
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 3173debeaa5a..2cf96ce1c32e 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -224,10 +224,10 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
struct erofs_device_info *dif, erofs_off_t *pos)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
+ struct erofs_fscache *fscache;
struct erofs_deviceslot *dis;
struct block_device *bdev;
void *ptr;
- int ret;
ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*pos), EROFS_KMAP);
if (IS_ERR(ptr))
@@ -245,10 +245,10 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
}
if (erofs_is_fscache_mode(sb)) {
- ret = erofs_fscache_register_cookie(sb, &dif->fscache,
- dif->path, false);
- if (ret)
- return ret;
+ fscache = erofs_fscache_register_cookie(sb, dif->path, false);
+ if (IS_ERR(fscache))
+ return PTR_ERR(fscache);
+ dif->fscache = fscache;
} else {
bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL,
sb->s_type);
@@ -381,6 +381,17 @@ static int erofs_read_superblock(struct super_block *sb)
#endif
sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
sbi->root_nid = le16_to_cpu(dsb->root_nid);
+#ifdef CONFIG_EROFS_FS_ZIP
+ sbi->packed_inode = NULL;
+ if (erofs_sb_has_fragments(sbi) && dsb->packed_nid) {
+ sbi->packed_inode =
+ erofs_iget(sb, le64_to_cpu(dsb->packed_nid));
+ if (IS_ERR(sbi->packed_inode)) {
+ ret = PTR_ERR(sbi->packed_inode);
+ goto out;
+ }
+ }
+#endif
sbi->inos = le64_to_cpu(dsb->inos);
sbi->build_time = le64_to_cpu(dsb->build_time);
@@ -411,6 +422,10 @@ static int erofs_read_superblock(struct super_block *sb)
erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
if (erofs_is_fscache_mode(sb))
erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!");
+ if (erofs_sb_has_fragments(sbi))
+ erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!");
+ if (erofs_sb_has_dedupe(sbi))
+ erofs_info(sb, "EXPERIMENTAL global deduplication feature in use. Use at your own risk!");
out:
erofs_put_metabuf(&buf);
return ret;
@@ -440,6 +455,7 @@ enum {
Opt_dax_enum,
Opt_device,
Opt_fsid,
+ Opt_domain_id,
Opt_err
};
@@ -465,6 +481,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums),
fsparam_string("device", Opt_device),
fsparam_string("fsid", Opt_fsid),
+ fsparam_string("domain_id", Opt_domain_id),
{}
};
@@ -570,6 +587,16 @@ static int erofs_fc_parse_param(struct fs_context *fc,
errorfc(fc, "fsid option not supported");
#endif
break;
+ case Opt_domain_id:
+#ifdef CONFIG_EROFS_FS_ONDEMAND
+ kfree(ctx->opt.domain_id);
+ ctx->opt.domain_id = kstrdup(param->string, GFP_KERNEL);
+ if (!ctx->opt.domain_id)
+ return -ENOMEM;
+#else
+ errorfc(fc, "domain_id option not supported");
+#endif
+ break;
default:
return -ENOPARAM;
}
@@ -641,7 +668,7 @@ static int erofs_init_managed_cache(struct super_block *sb) { return 0; }
static struct inode *erofs_nfs_get_inode(struct super_block *sb,
u64 ino, u32 generation)
{
- return erofs_iget(sb, ino, false);
+ return erofs_iget(sb, ino);
}
static struct dentry *erofs_fh_to_dentry(struct super_block *sb,
@@ -667,7 +694,7 @@ static struct dentry *erofs_get_parent(struct dentry *child)
err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type);
if (err)
return ERR_PTR(err);
- return d_obtain_alias(erofs_iget(child->d_sb, nid, d_type == FT_DIR));
+ return d_obtain_alias(erofs_iget(child->d_sb, nid));
}
static const struct export_operations erofs_export_ops = {
@@ -676,6 +703,13 @@ static const struct export_operations erofs_export_ops = {
.get_parent = erofs_get_parent,
};
+static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc)
+{
+ static const struct tree_descr empty_descr = {""};
+
+ return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr);
+}
+
static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
@@ -695,6 +729,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_fs_info = sbi;
sbi->opt = ctx->opt;
ctx->opt.fsid = NULL;
+ ctx->opt.domain_id = NULL;
sbi->devs = ctx->devs;
ctx->devs = NULL;
@@ -706,11 +741,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
if (err)
return err;
- err = erofs_fscache_register_cookie(sb, &sbi->s_fscache,
- sbi->opt.fsid, true);
- if (err)
- return err;
-
err = super_setup_bdi(sb);
if (err)
return err;
@@ -752,7 +782,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
#endif
/* get the root inode */
- inode = erofs_iget(sb, ROOT_NID(sbi), true);
+ inode = erofs_iget(sb, ROOT_NID(sbi));
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -781,6 +811,11 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
return 0;
}
+static int erofs_fc_anon_get_tree(struct fs_context *fc)
+{
+ return get_tree_nodev(fc, erofs_fc_fill_pseudo_super);
+}
+
static int erofs_fc_get_tree(struct fs_context *fc)
{
struct erofs_fs_context *ctx = fc->fs_private;
@@ -817,7 +852,8 @@ static int erofs_release_device_info(int id, void *ptr, void *data)
fs_put_dax(dif->dax_dev, NULL);
if (dif->bdev)
blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL);
- erofs_fscache_unregister_cookie(&dif->fscache);
+ erofs_fscache_unregister_cookie(dif->fscache);
+ dif->fscache = NULL;
kfree(dif->path);
kfree(dif);
return 0;
@@ -838,6 +874,7 @@ static void erofs_fc_free(struct fs_context *fc)
erofs_free_dev_context(ctx->devs);
kfree(ctx->opt.fsid);
+ kfree(ctx->opt.domain_id);
kfree(ctx);
}
@@ -848,10 +885,21 @@ static const struct fs_context_operations erofs_context_ops = {
.free = erofs_fc_free,
};
+static const struct fs_context_operations erofs_anon_context_ops = {
+ .get_tree = erofs_fc_anon_get_tree,
+};
+
static int erofs_init_fs_context(struct fs_context *fc)
{
- struct erofs_fs_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ struct erofs_fs_context *ctx;
+
+ /* pseudo mount for anon inodes */
+ if (fc->sb_flags & SB_KERNMOUNT) {
+ fc->ops = &erofs_anon_context_ops;
+ return 0;
+ }
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL);
@@ -878,8 +926,14 @@ static void erofs_kill_sb(struct super_block *sb)
WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
+ /* pseudo mount for anon inodes */
+ if (sb->s_flags & SB_KERNMOUNT) {
+ kill_anon_super(sb);
+ return;
+ }
+
if (erofs_is_fscache_mode(sb))
- generic_shutdown_super(sb);
+ kill_anon_super(sb);
else
kill_block_super(sb);
@@ -889,9 +943,9 @@ static void erofs_kill_sb(struct super_block *sb)
erofs_free_dev_context(sbi->devs);
fs_put_dax(sbi->dax_dev, NULL);
- erofs_fscache_unregister_cookie(&sbi->s_fscache);
erofs_fscache_unregister_fs(sb);
kfree(sbi->opt.fsid);
+ kfree(sbi->opt.domain_id);
kfree(sbi);
sb->s_fs_info = NULL;
}
@@ -908,11 +962,13 @@ static void erofs_put_super(struct super_block *sb)
#ifdef CONFIG_EROFS_FS_ZIP
iput(sbi->managed_cache);
sbi->managed_cache = NULL;
+ iput(sbi->packed_inode);
+ sbi->packed_inode = NULL;
#endif
- erofs_fscache_unregister_cookie(&sbi->s_fscache);
+ erofs_fscache_unregister_fs(sb);
}
-static struct file_system_type erofs_fs_type = {
+struct file_system_type erofs_fs_type = {
.owner = THIS_MODULE,
.name = "erofs",
.init_fs_context = erofs_init_fs_context,
@@ -1044,6 +1100,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
#ifdef CONFIG_EROFS_FS_ONDEMAND
if (opt->fsid)
seq_printf(seq, ",fsid=%s", opt->fsid);
+ if (opt->domain_id)
+ seq_printf(seq, ",domain_id=%s", opt->domain_id);
#endif
return 0;
}
diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
index c1383e508bbe..783bb7b21b51 100644
--- a/fs/erofs/sysfs.c
+++ b/fs/erofs/sysfs.c
@@ -76,6 +76,8 @@ EROFS_ATTR_FEATURE(device_table);
EROFS_ATTR_FEATURE(compr_head2);
EROFS_ATTR_FEATURE(sb_chksum);
EROFS_ATTR_FEATURE(ztailpacking);
+EROFS_ATTR_FEATURE(fragments);
+EROFS_ATTR_FEATURE(dedupe);
static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(zero_padding),
@@ -86,6 +88,8 @@ static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(compr_head2),
ATTR_LIST(sb_chksum),
ATTR_LIST(ztailpacking),
+ ATTR_LIST(fragments),
+ ATTR_LIST(dedupe),
NULL,
};
ATTRIBUTE_GROUPS(erofs_feat);
@@ -201,12 +205,27 @@ static struct kobject erofs_feat = {
int erofs_register_sysfs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
+ char *name;
+ char *str = NULL;
int err;
+ if (erofs_is_fscache_mode(sb)) {
+ if (sbi->opt.domain_id) {
+ str = kasprintf(GFP_KERNEL, "%s,%s", sbi->opt.domain_id,
+ sbi->opt.fsid);
+ if (!str)
+ return -ENOMEM;
+ name = str;
+ } else {
+ name = sbi->opt.fsid;
+ }
+ } else {
+ name = sb->s_id;
+ }
sbi->s_kobj.kset = &erofs_root;
init_completion(&sbi->s_kobj_unregister);
- err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s",
- erofs_is_fscache_mode(sb) ? sbi->opt.fsid : sb->s_id);
+ err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s", name);
+ kfree(str);
if (err)
goto put_sb_kobj;
return 0;
diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h
index 332462c59f11..0a43c9ee9f8f 100644
--- a/fs/erofs/xattr.h
+++ b/fs/erofs/xattr.h
@@ -39,9 +39,7 @@ static inline unsigned int xattrblock_offset(struct erofs_sb_info *sbi,
#ifdef CONFIG_EROFS_FS_XATTR
extern const struct xattr_handler erofs_xattr_user_handler;
extern const struct xattr_handler erofs_xattr_trusted_handler;
-#ifdef CONFIG_EROFS_FS_SECURITY
extern const struct xattr_handler erofs_xattr_security_handler;
-#endif
static inline const struct xattr_handler *erofs_xattr_handler(unsigned int idx)
{
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 5792ca9e0d5e..c7f24fc7efd5 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -7,6 +7,7 @@
#include "zdata.h"
#include "compress.h"
#include <linux/prefetch.h>
+#include <linux/psi.h>
#include <trace/events/erofs.h>
@@ -650,6 +651,35 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
la < fe->headoffset;
}
+static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos,
+ struct page *page, unsigned int pageofs,
+ unsigned int len)
+{
+ struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
+ u8 *src, *dst;
+ unsigned int i, cnt;
+
+ pos += EROFS_I(inode)->z_fragmentoff;
+ for (i = 0; i < len; i += cnt) {
+ cnt = min_t(unsigned int, len - i,
+ EROFS_BLKSIZ - erofs_blkoff(pos));
+ src = erofs_bread(&buf, packed_inode,
+ erofs_blknr(pos), EROFS_KMAP);
+ if (IS_ERR(src)) {
+ erofs_put_metabuf(&buf);
+ return PTR_ERR(src);
+ }
+
+ dst = kmap_local_page(page);
+ memcpy(dst + pageofs + i, src + erofs_blkoff(pos), cnt);
+ kunmap_local(dst);
+ pos += cnt;
+ }
+ erofs_put_metabuf(&buf);
+ return 0;
+}
+
static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct page *page, struct page **pagepool)
{
@@ -688,7 +718,8 @@ repeat:
/* didn't get a valid pcluster previously (very rare) */
}
- if (!(map->m_flags & EROFS_MAP_MAPPED))
+ if (!(map->m_flags & EROFS_MAP_MAPPED) ||
+ map->m_flags & EROFS_MAP_FRAGMENT)
goto hitted;
err = z_erofs_collector_begin(fe);
@@ -735,6 +766,24 @@ hitted:
zero_user_segment(page, cur, end);
goto next_part;
}
+ if (map->m_flags & EROFS_MAP_FRAGMENT) {
+ unsigned int pageofs, skip, len;
+
+ if (offset > map->m_la) {
+ pageofs = 0;
+ skip = offset - map->m_la;
+ } else {
+ pageofs = map->m_la & ~PAGE_MASK;
+ skip = 0;
+ }
+ len = min_t(unsigned int, map->m_llen - skip, end - cur);
+ err = z_erofs_read_fragment(inode, skip, page, pageofs, len);
+ if (err)
+ goto out;
+ ++spiltted;
+ tight = false;
+ goto next_part;
+ }
exclusive = (!cur && (!spiltted || tight));
if (cur)
@@ -764,14 +813,14 @@ retry:
++spiltted;
if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
fe->pcl->multibases = true;
-
- if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
- fe->pcl->length == map->m_llen)
- fe->pcl->partial = false;
if (fe->pcl->length < offset + end - map->m_la) {
fe->pcl->length = offset + end - map->m_la;
fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK;
}
+ if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
+ !(map->m_flags & EROFS_MAP_PARTIAL_REF) &&
+ fe->pcl->length == map->m_llen)
+ fe->pcl->partial = false;
next_part:
/* shorten the remaining extent to update progress */
map->m_llen = offset + cur - map->m_la;
@@ -838,15 +887,13 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) {
unsigned int pgnr;
- struct page *oldpage;
pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT;
DBG_BUGON(pgnr >= be->nr_pages);
- oldpage = be->decompressed_pages[pgnr];
- be->decompressed_pages[pgnr] = bvec->page;
-
- if (!oldpage)
+ if (!be->decompressed_pages[pgnr]) {
+ be->decompressed_pages[pgnr] = bvec->page;
return;
+ }
}
/* (cold path) one pcluster is requested multiple times */
@@ -1365,6 +1412,8 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
struct block_device *last_bdev;
unsigned int nr_bios = 0;
struct bio *bio = NULL;
+ /* initialize to 1 to make skip psi_memstall_leave unless needed */
+ unsigned long pflags = 1;
bi_private = jobqueueset_init(sb, q, fgq, force_fg);
qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head;
@@ -1414,10 +1463,15 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
if (bio && (cur != last_index + 1 ||
last_bdev != mdev.m_bdev)) {
submit_bio_retry:
+ if (!pflags)
+ psi_memstall_leave(&pflags);
submit_bio(bio);
bio = NULL;
}
+ if (unlikely(PageWorkingset(page)))
+ psi_memstall_enter(&pflags);
+
if (!bio) {
bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
REQ_OP_READ, GFP_NOIO);
@@ -1445,8 +1499,11 @@ submit_bio_retry:
move_to_bypass_jobqueue(pcl, qtail, owned_head);
} while (owned_head != Z_EROFS_PCLUSTER_TAIL);
- if (bio)
+ if (bio) {
+ if (!pflags)
+ psi_memstall_leave(&pflags);
submit_bio(bio);
+ }
/*
* although background is preferred, no one is pending for submission.
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index e7f04c4fbb81..d98c95212985 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -126,10 +126,10 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
}
/*
- * bit 31: I/O error occurred on this page
- * bit 0 - 30: remaining parts to complete this page
+ * bit 30: I/O error occurred on this page
+ * bit 0 - 29: remaining parts to complete this page
*/
-#define Z_EROFS_PAGE_EIO (1 << 31)
+#define Z_EROFS_PAGE_EIO (1 << 30)
static inline void z_erofs_onlinepage_init(struct page *page)
{
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index d58549ca1df9..0bb66927e3d0 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -17,7 +17,7 @@ int z_erofs_fill_inode(struct inode *inode)
struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
if (!erofs_sb_has_big_pcluster(sbi) &&
- !erofs_sb_has_ztailpacking(sbi) &&
+ !erofs_sb_has_ztailpacking(sbi) && !erofs_sb_has_fragments(sbi) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
vi->z_advise = 0;
vi->z_algorithmtype[0] = 0;
@@ -55,20 +55,25 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
goto out_unlock;
- DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
- !erofs_sb_has_ztailpacking(EROFS_SB(sb)) &&
- vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
-
pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
vi->xattr_isize, 8);
- kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos),
- EROFS_KMAP_ATOMIC);
+ kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
if (IS_ERR(kaddr)) {
err = PTR_ERR(kaddr);
goto out_unlock;
}
h = kaddr + erofs_blkoff(pos);
+ /*
+ * if the highest bit of the 8-byte map header is set, the whole file
+ * is stored in the packed inode. The rest bits keeps z_fragmentoff.
+ */
+ if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
+ vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
+ vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
+ vi->z_tailextent_headlcn = 0;
+ goto done;
+ }
vi->z_advise = le16_to_cpu(h->h_advise);
vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
@@ -79,7 +84,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
err = -EOPNOTSUPP;
- goto unmap_done;
+ goto out_put_metabuf;
}
vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
@@ -89,7 +94,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
vi->nid);
err = -EFSCORRUPTED;
- goto unmap_done;
+ goto out_put_metabuf;
}
if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
@@ -97,12 +102,8 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
vi->nid);
err = -EFSCORRUPTED;
- goto unmap_done;
+ goto out_put_metabuf;
}
-unmap_done:
- erofs_put_metabuf(&buf);
- if (err)
- goto out_unlock;
if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
struct erofs_map_blocks map = {
@@ -121,11 +122,28 @@ unmap_done:
err = -EFSCORRUPTED;
}
if (err < 0)
- goto out_unlock;
+ goto out_put_metabuf;
+ }
+
+ if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
+ !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
+ struct erofs_map_blocks map = {
+ .buf = __EROFS_BUF_INITIALIZER
+ };
+
+ vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
+ err = z_erofs_do_map_blocks(inode, &map,
+ EROFS_GET_BLOCKS_FINDTAIL);
+ erofs_put_metabuf(&map.buf);
+ if (err < 0)
+ goto out_put_metabuf;
}
+done:
/* paired with smp_mb() at the beginning of the function */
smp_mb();
set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
+out_put_metabuf:
+ erofs_put_metabuf(&buf);
out_unlock:
clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
return err;
@@ -143,20 +161,9 @@ struct z_erofs_maprecorder {
u16 delta[2];
erofs_blk_t pblk, compressedblks;
erofs_off_t nextpackoff;
+ bool partialref;
};
-static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
- erofs_blk_t eblk)
-{
- struct super_block *const sb = m->inode->i_sb;
-
- m->kaddr = erofs_read_metabuf(&m->map->buf, sb, eblk,
- EROFS_KMAP_ATOMIC);
- if (IS_ERR(m->kaddr))
- return PTR_ERR(m->kaddr);
- return 0;
-}
-
static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
unsigned long lcn)
{
@@ -169,11 +176,11 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
lcn * sizeof(struct z_erofs_vle_decompressed_index);
struct z_erofs_vle_decompressed_index *di;
unsigned int advise, type;
- int err;
- err = z_erofs_reload_indexes(m, erofs_blknr(pos));
- if (err)
- return err;
+ m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
+ erofs_blknr(pos), EROFS_KMAP_ATOMIC);
+ if (IS_ERR(m->kaddr))
+ return PTR_ERR(m->kaddr);
m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index);
m->lcn = lcn;
@@ -201,6 +208,8 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
+ if (advise & Z_EROFS_VLE_DI_PARTIAL_REF)
+ m->partialref = true;
m->clusterofs = le16_to_cpu(di->di_clusterofs);
m->pblk = le32_to_cpu(di->di_u.blkaddr);
break;
@@ -370,7 +379,6 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
unsigned int compacted_4b_initial, compacted_2b;
unsigned int amortizedshift;
erofs_off_t pos;
- int err;
if (lclusterbits != 12)
return -EOPNOTSUPP;
@@ -407,9 +415,10 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
amortizedshift = 2;
out:
pos += lcn * (1 << amortizedshift);
- err = z_erofs_reload_indexes(m, erofs_blknr(pos));
- if (err)
- return err;
+ m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
+ erofs_blknr(pos), EROFS_KMAP_ATOMIC);
+ if (IS_ERR(m->kaddr))
+ return PTR_ERR(m->kaddr);
return unpack_compacted_index(m, amortizedshift, pos, lookahead);
}
@@ -598,6 +607,7 @@ static int z_erofs_do_map_blocks(struct inode *inode,
{
struct erofs_inode *const vi = EROFS_I(inode);
bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
+ bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
struct z_erofs_maprecorder m = {
.inode = inode,
.map = map,
@@ -663,15 +673,23 @@ static int z_erofs_do_map_blocks(struct inode *inode,
err = -EOPNOTSUPP;
goto unmap_out;
}
-
+ if (m.partialref)
+ map->m_flags |= EROFS_MAP_PARTIAL_REF;
map->m_llen = end - map->m_la;
- if (flags & EROFS_GET_BLOCKS_FINDTAIL)
+ if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
vi->z_tailextent_headlcn = m.lcn;
+ /* for non-compact indexes, fragmentoff is 64 bits */
+ if (fragment &&
+ vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
+ vi->z_fragmentoff |= (u64)m.pblk << 32;
+ }
if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
map->m_flags |= EROFS_MAP_META;
map->m_pa = vi->z_idataoff;
map->m_plen = vi->z_idata_size;
+ } else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
+ map->m_flags |= EROFS_MAP_FRAGMENT;
} else {
map->m_pa = blknr_to_addr(m.pblk);
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
@@ -679,12 +697,18 @@ static int z_erofs_do_map_blocks(struct inode *inode,
goto out;
}
- if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN)
- map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
- else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2)
+ if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) {
+ if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
+ map->m_algorithmformat =
+ Z_EROFS_COMPRESSION_INTERLACED;
+ else
+ map->m_algorithmformat =
+ Z_EROFS_COMPRESSION_SHIFTED;
+ } else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) {
map->m_algorithmformat = vi->z_algorithmtype[1];
- else
+ } else {
map->m_algorithmformat = vi->z_algorithmtype[0];
+ }
if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
((flags & EROFS_GET_BLOCKS_READMORE) &&
@@ -705,10 +729,10 @@ out:
return err;
}
-int z_erofs_map_blocks_iter(struct inode *inode,
- struct erofs_map_blocks *map,
+int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
int flags)
{
+ struct erofs_inode *const vi = EROFS_I(inode);
int err = 0;
trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
@@ -725,6 +749,15 @@ int z_erofs_map_blocks_iter(struct inode *inode,
if (err)
goto out;
+ if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
+ !vi->z_tailextent_headlcn) {
+ map->m_la = 0;
+ map->m_llen = inode->i_size;
+ map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED |
+ EROFS_MAP_FRAGMENT;
+ goto out;
+ }
+
err = z_erofs_do_map_blocks(inode, map, flags);
out:
trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
@@ -751,7 +784,8 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
iomap->length = map.m_llen;
if (map.m_flags & EROFS_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
- iomap->addr = map.m_pa;
+ iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ?
+ IOMAP_NULL_ADDR : map.m_pa;
} else {
iomap->type = IOMAP_HOLE;
iomap->addr = IOMAP_NULL_ADDR;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 3627dd7d25db..c0ffee99ad23 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -69,17 +69,17 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
* it returns false, the eventfd_signal() call should be deferred to a
* safe context.
*/
- if (WARN_ON_ONCE(current->in_eventfd_signal))
+ if (WARN_ON_ONCE(current->in_eventfd))
return 0;
spin_lock_irqsave(&ctx->wqh.lock, flags);
- current->in_eventfd_signal = 1;
+ current->in_eventfd = 1;
if (ULLONG_MAX - ctx->count < n)
n = ULLONG_MAX - ctx->count;
ctx->count += n;
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, EPOLLIN);
- current->in_eventfd_signal = 0;
+ current->in_eventfd = 0;
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
return n;
@@ -253,8 +253,10 @@ static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to)
__set_current_state(TASK_RUNNING);
}
eventfd_ctx_do_read(ctx, &ucnt);
+ current->in_eventfd = 1;
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
+ current->in_eventfd = 0;
spin_unlock_irq(&ctx->wqh.lock);
if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt)))
return -EFAULT;
@@ -301,8 +303,10 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
}
if (likely(res > 0)) {
ctx->count += ucnt;
+ current->in_eventfd = 1;
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, EPOLLIN);
+ current->in_eventfd = 0;
}
spin_unlock_irq(&ctx->wqh.lock);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8b56b94e2f56..52954d4637b5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1065,7 +1065,7 @@ static inline bool list_add_tail_lockless(struct list_head *new,
* added to the list from another CPU: the winner observes
* new->next == new.
*/
- if (cmpxchg(&new->next, new, head) != new)
+ if (!try_cmpxchg(&new->next, &new, head))
return false;
/*
diff --git a/fs/exec.c b/fs/exec.c
index d046dbb9cbd0..a0b1f0337a62 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -28,7 +28,6 @@
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/mm.h>
-#include <linux/vmacache.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/swap.h>
@@ -683,6 +682,8 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
+ VMA_ITERATOR(vmi, mm, new_start);
+ struct vm_area_struct *next;
struct mmu_gather tlb;
BUG_ON(new_start > new_end);
@@ -691,7 +692,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* ensure there are no vmas between where we want to go
* and where we are
*/
- if (vma != find_vma(mm, new_start))
+ if (vma != vma_next(&vmi))
return -EFAULT;
/*
@@ -710,12 +711,13 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
lru_add_drain();
tlb_gather_mmu(&tlb, mm);
+ next = vma_next(&vmi);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
free_pgd_range(&tlb, new_end, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
+ next ? next->vm_start : USER_PGTABLES_CEILING);
} else {
/*
* otherwise, clean from old_start; this is done to not touch
@@ -724,7 +726,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* for the others its just a little faster.
*/
free_pgd_range(&tlb, old_start, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
+ next ? next->vm_start : USER_PGTABLES_CEILING);
}
tlb_finish_mmu(&tlb);
@@ -957,8 +959,7 @@ struct file *open_exec(const char *name)
}
EXPORT_SYMBOL(open_exec);
-#if defined(CONFIG_HAVE_AOUT) || defined(CONFIG_BINFMT_FLAT) || \
- defined(CONFIG_BINFMT_ELF_FDPIC)
+#if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC)
ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
{
ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
@@ -1023,9 +1024,9 @@ static int exec_mmap(struct mm_struct *mm)
activate_mm(active_mm, mm);
if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
local_irq_enable();
- tsk->mm->vmacache_seqnum = 0;
- vmacache_flush(tsk);
+ lru_gen_add_mm(mm);
task_unlock(tsk);
+ lru_gen_use_mm(mm);
if (old_mm) {
mmap_read_unlock(old_mm);
BUG_ON(active_mm != old_mm);
@@ -1196,11 +1197,11 @@ static int unshare_sighand(struct task_struct *me)
return -ENOMEM;
refcount_set(&newsighand->count, 1);
- memcpy(newsighand->action, oldsighand->action,
- sizeof(newsighand->action));
write_lock_irq(&tasklist_lock);
spin_lock(&oldsighand->siglock);
+ memcpy(newsighand->action, oldsighand->action,
+ sizeof(newsighand->action));
rcu_assign_pointer(me->sighand, newsighand);
spin_unlock(&oldsighand->siglock);
write_unlock_irq(&tasklist_lock);
@@ -1588,7 +1589,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
{
/* Handle suid and sgid on files */
struct user_namespace *mnt_userns;
- struct inode *inode;
+ struct inode *inode = file_inode(file);
unsigned int mode;
kuid_t uid;
kgid_t gid;
@@ -1599,7 +1600,6 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
if (task_no_new_privs(current))
return;
- inode = file->f_path.dentry->d_inode;
mode = READ_ONCE(inode->i_mode);
if (!(mode & (S_ISUID|S_ISGID)))
return;
@@ -1881,7 +1881,7 @@ static int do_execveat_common(int fd, struct filename *filename,
* whether NPROC limit is still exceeded.
*/
if ((current->flags & PF_NPROC_EXCEEDED) &&
- is_ucounts_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
+ is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
retval = -EAGAIN;
goto out_ret;
}
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index a27b55ec060a..0fc08fdcba73 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -212,9 +212,9 @@ static void exfat_free_namebuf(struct exfat_dentry_namebuf *nb)
/* skip iterating emit_dots when dir is empty */
#define ITER_POS_FILLED_DOTS (2)
-static int exfat_iterate(struct file *filp, struct dir_context *ctx)
+static int exfat_iterate(struct file *file, struct dir_context *ctx)
{
- struct inode *inode = filp->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct inode *tmp;
struct exfat_dir_entry de;
@@ -228,7 +228,7 @@ static int exfat_iterate(struct file *filp, struct dir_context *ctx)
mutex_lock(&EXFAT_SB(sb)->s_lock);
cpos = ctx->pos;
- if (!dir_emit_dots(filp, ctx))
+ if (!dir_emit_dots(file, ctx))
goto unlock;
if (ctx->pos == ITER_POS_FILLED_DOTS) {
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index a795437b86d0..5590a1e83126 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -552,7 +552,7 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
inode->i_uid = sbi->options.fs_uid;
inode->i_gid = sbi->options.fs_gid;
inode_inc_iversion(inode);
- inode->i_generation = prandom_u32();
+ inode->i_generation = get_random_u32();
if (info->attr & ATTR_SUBDIR) { /* directory */
inode->i_generation &= ~1;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 3ef80d000e13..c648a493faf2 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -248,21 +248,20 @@ struct getdents_callback {
* A rather strange filldir function to capture
* the name matching the specified inode number.
*/
-static int filldir_one(struct dir_context *ctx, const char *name, int len,
+static bool filldir_one(struct dir_context *ctx, const char *name, int len,
loff_t pos, u64 ino, unsigned int d_type)
{
struct getdents_callback *buf =
container_of(ctx, struct getdents_callback, ctx);
- int result = 0;
buf->sequence++;
if (buf->ino == ino && len <= NAME_MAX) {
memcpy(buf->name, name, len);
buf->name[len] = '\0';
buf->found = 1;
- result = -1;
+ return false; // no more
}
- return result;
+ return true;
}
/**
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index c17ccc19b938..5dc0a31f4a08 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -126,6 +126,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
struct ext2_group_desc * desc;
struct buffer_head * bh = NULL;
ext2_fsblk_t bitmap_blk;
+ int ret;
desc = ext2_get_group_desc(sb, block_group, NULL);
if (!desc)
@@ -139,10 +140,10 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
block_group, le32_to_cpu(desc->bg_block_bitmap));
return NULL;
}
- if (likely(bh_uptodate_or_lock(bh)))
+ ret = bh_read(bh, 0);
+ if (ret > 0)
return bh;
-
- if (bh_submit_read(bh) < 0) {
+ if (ret < 0) {
brelse(bh);
ext2_error(sb, __func__,
"Cannot read block bitmap - "
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 998dd2ac8008..f4944c4dee60 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -277,8 +277,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
int best_ndir = inodes_per_group;
int best_group = -1;
- group = prandom_u32();
- parent_group = (unsigned)group % ngroups;
+ parent_group = prandom_u32_max(ngroups);
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext2_get_group_desc (sb, group, NULL);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 5fd9a22d2b70..9125eab85146 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -120,7 +120,7 @@ static int ext2_create (struct user_namespace * mnt_userns,
}
static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+ struct file *file, umode_t mode)
{
struct inode *inode = ext2_new_inode(dir, mode, NULL);
if (IS_ERR(inode))
@@ -128,9 +128,9 @@ static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
ext2_set_file_ops(inode);
mark_inode_dirty(inode);
- d_tmpfile(dentry, inode);
+ d_tmpfile(file, inode);
unlock_new_inode(inode);
- return 0;
+ return finish_open_simple(file, 0);
}
static int ext2_mknod (struct user_namespace * mnt_userns, struct inode * dir,
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 252c742379cf..03f2af98b1b4 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -163,7 +163,7 @@ static void ext2_put_super (struct super_block * sb)
db_count = sbi->s_gdb_count;
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
- kfree(sbi->s_group_desc);
+ kvfree(sbi->s_group_desc);
kfree(sbi->s_debts);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@ -1052,6 +1052,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_blocks_per_group);
goto failed_mount;
}
+ /* At least inode table, bitmaps, and sb have to fit in one group */
+ if (sbi->s_blocks_per_group <= sbi->s_itb_per_group + 3) {
+ ext2_msg(sb, KERN_ERR,
+ "error: #blocks per group smaller than metadata size: %lu <= %lu",
+ sbi->s_blocks_per_group, sbi->s_inodes_per_group + 3);
+ goto failed_mount;
+ }
if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
ext2_msg(sb, KERN_ERR,
"error: #fragments per group too big: %lu",
@@ -1065,9 +1072,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_inodes_per_group);
goto failed_mount;
}
+ if (sb_bdev_nr_blocks(sb) < le32_to_cpu(es->s_blocks_count)) {
+ ext2_msg(sb, KERN_ERR,
+ "bad geometry: block count %u exceeds size of device (%u blocks)",
+ le32_to_cpu(es->s_blocks_count),
+ (unsigned)sb_bdev_nr_blocks(sb));
+ goto failed_mount;
+ }
- if (EXT2_BLOCKS_PER_GROUP(sb) == 0)
- goto cantfind_ext2;
sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
le32_to_cpu(es->s_first_data_block) - 1)
/ EXT2_BLOCKS_PER_GROUP(sb)) + 1;
@@ -1080,7 +1092,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
}
db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
EXT2_DESC_PER_BLOCK(sb);
- sbi->s_group_desc = kmalloc_array(db_count,
+ sbi->s_group_desc = kvmalloc_array(db_count,
sizeof(struct buffer_head *),
GFP_KERNEL);
if (sbi->s_group_desc == NULL) {
@@ -1206,7 +1218,7 @@ failed_mount2:
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
failed_mount_group_desc:
- kfree(sbi->s_group_desc);
+ kvfree(sbi->s_group_desc);
kfree(sbi->s_debts);
failed_mount:
brelse(bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3bf9a6926798..8d5453852f98 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2977,6 +2977,7 @@ extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct user_namespace *, struct dentry *,
struct iattr *);
+extern u32 ext4_dio_alignment(struct inode *inode);
extern int ext4_getattr(struct user_namespace *, const struct path *,
struct kstat *, u32, unsigned int);
extern void ext4_evict_inode(struct inode *);
@@ -3591,9 +3592,6 @@ extern bool empty_inline_dir(struct inode *dir, int *has_inline_data);
extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
struct ext4_dir_entry_2 **parent_de,
int *retval);
-extern int ext4_inline_data_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo,
- int *has_inline, __u64 start, __u64 len);
extern void *ext4_read_inline_link(struct inode *inode);
struct iomap;
@@ -3712,7 +3710,7 @@ extern int ext4_ext_insert_extent(handle_t *, struct inode *,
extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path **,
int flags);
-extern void ext4_ext_drop_refs(struct ext4_ext_path *);
+extern void ext4_free_ext_path(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5235974126bd..f1956288307f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -106,6 +106,25 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
return 0;
}
+static void ext4_ext_drop_refs(struct ext4_ext_path *path)
+{
+ int depth, i;
+
+ if (!path)
+ return;
+ depth = path->p_depth;
+ for (i = 0; i <= depth; i++, path++) {
+ brelse(path->p_bh);
+ path->p_bh = NULL;
+ }
+}
+
+void ext4_free_ext_path(struct ext4_ext_path *path)
+{
+ ext4_ext_drop_refs(path);
+ kfree(path);
+}
+
/*
* Make sure 'handle' has at least 'check_cred' credits. If not, restart
* transaction with 'restart_cred' credits. The function drops i_data_sem
@@ -636,8 +655,7 @@ int ext4_ext_precache(struct inode *inode)
ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
out:
up_read(&ei->i_data_sem);
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
return ret;
}
@@ -724,19 +742,6 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
#define ext4_ext_show_move(inode, path, newblock, level)
#endif
-void ext4_ext_drop_refs(struct ext4_ext_path *path)
-{
- int depth, i;
-
- if (!path)
- return;
- depth = path->p_depth;
- for (i = 0; i <= depth; i++, path++) {
- brelse(path->p_bh);
- path->p_bh = NULL;
- }
-}
-
/*
* ext4_ext_binsearch_idx:
* binary search for the closest index of the given block
@@ -955,8 +960,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
return path;
err:
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
if (orig_path)
*orig_path = NULL;
return ERR_PTR(ret);
@@ -2174,8 +2178,7 @@ merge:
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
cleanup:
- ext4_ext_drop_refs(npath);
- kfree(npath);
+ ext4_free_ext_path(npath);
return err;
}
@@ -3061,8 +3064,7 @@ again:
}
}
out:
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
path = NULL;
if (err == -EAGAIN)
goto again;
@@ -4375,8 +4377,7 @@ got_allocated_blocks:
allocated = map->m_len;
ext4_ext_show_leaf(inode, path);
out:
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
trace_ext4_ext_map_blocks_exit(inode, flags, map,
err ? err : allocated);
@@ -5245,8 +5246,7 @@ again:
break;
}
out:
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
return ret;
}
@@ -5538,15 +5538,13 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
EXT4_GET_BLOCKS_METADATA_NOFAIL);
}
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
if (ret < 0) {
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
} else {
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
}
ret = ext4_es_remove_extent(inode, offset_lblk,
@@ -5766,10 +5764,8 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
count -= len;
repeat:
- ext4_ext_drop_refs(path1);
- kfree(path1);
- ext4_ext_drop_refs(path2);
- kfree(path2);
+ ext4_free_ext_path(path1);
+ ext4_free_ext_path(path2);
path1 = path2 = NULL;
}
return replaced_count;
@@ -5848,8 +5844,7 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
}
out:
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
return err ? err : mapped;
}
@@ -5916,8 +5911,7 @@ int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
up_write(&EXT4_I(inode)->i_data_sem);
out:
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
ext4_mark_inode_dirty(NULL, inode);
return ret;
}
@@ -5935,8 +5929,7 @@ void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
return;
ex = path[path->p_depth].p_ext;
if (!ex) {
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
ext4_mark_inode_dirty(NULL, inode);
return;
}
@@ -5949,8 +5942,7 @@ void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
up_write(&EXT4_I(inode)->i_data_sem);
ext4_mark_inode_dirty(NULL, inode);
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
}
}
@@ -5989,13 +5981,11 @@ int ext4_ext_replay_set_iblocks(struct inode *inode)
return PTR_ERR(path);
ex = path[path->p_depth].p_ext;
if (!ex) {
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
goto out;
}
end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
/* Count the number of data blocks */
cur = 0;
@@ -6025,30 +6015,26 @@ int ext4_ext_replay_set_iblocks(struct inode *inode)
if (IS_ERR(path))
goto out;
numblks += path->p_depth;
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
while (cur < end) {
path = ext4_find_extent(inode, cur, NULL, 0);
if (IS_ERR(path))
break;
ex = path[path->p_depth].p_ext;
if (!ex) {
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
return 0;
}
cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
ext4_ext_get_actual_len(ex));
ret = skip_hole(inode, &cur);
if (ret < 0) {
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
break;
}
path2 = ext4_find_extent(inode, cur, NULL, 0);
if (IS_ERR(path2)) {
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
break;
}
for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
@@ -6062,10 +6048,8 @@ int ext4_ext_replay_set_iblocks(struct inode *inode)
if (cmp1 != cmp2 && cmp2 != 0)
numblks++;
}
- ext4_ext_drop_refs(path);
- ext4_ext_drop_refs(path2);
- kfree(path);
- kfree(path2);
+ ext4_free_ext_path(path);
+ ext4_free_ext_path(path2);
}
out:
@@ -6092,13 +6076,11 @@ int ext4_ext_clear_bb(struct inode *inode)
return PTR_ERR(path);
ex = path[path->p_depth].p_ext;
if (!ex) {
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
return 0;
}
end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
cur = 0;
while (cur < end) {
@@ -6117,8 +6099,7 @@ int ext4_ext_clear_bb(struct inode *inode)
ext4_fc_record_regions(inode->i_sb, inode->i_ino,
0, path[j].p_block, 1, 1);
}
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
}
ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
ext4_fc_record_regions(inode->i_sb, inode->i_ino,
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 23167efda95e..cd0a861853e3 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -667,8 +667,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
}
}
out:
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
}
static void ext4_es_insert_extent_ind_check(struct inode *inode,
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 2af962cbb835..0f6d0a80467d 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -229,6 +229,12 @@ __releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
finish_wait(wq, &wait.wq_entry);
}
+static bool ext4_fc_disabled(struct super_block *sb)
+{
+ return (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
+ (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY));
+}
+
/*
* Inform Ext4's fast about start of an inode update
*
@@ -240,8 +246,7 @@ void ext4_fc_start_update(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
- if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
- (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
+ if (ext4_fc_disabled(inode->i_sb))
return;
restart:
@@ -265,8 +270,7 @@ void ext4_fc_stop_update(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
- if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
- (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
+ if (ext4_fc_disabled(inode->i_sb))
return;
if (atomic_dec_and_test(&ei->i_fc_updates))
@@ -283,8 +287,7 @@ void ext4_fc_del(struct inode *inode)
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_fc_dentry_update *fc_dentry;
- if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
- (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
+ if (ext4_fc_disabled(inode->i_sb))
return;
restart:
@@ -337,8 +340,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl
struct ext4_sb_info *sbi = EXT4_SB(sb);
tid_t tid;
- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
- (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
+ if (ext4_fc_disabled(sb))
return;
ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
@@ -493,10 +495,8 @@ void __ext4_fc_track_unlink(handle_t *handle,
void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
- (sbi->s_mount_state & EXT4_FC_REPLAY))
+ if (ext4_fc_disabled(inode->i_sb))
return;
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
@@ -522,10 +522,8 @@ void __ext4_fc_track_link(handle_t *handle,
void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
- (sbi->s_mount_state & EXT4_FC_REPLAY))
+ if (ext4_fc_disabled(inode->i_sb))
return;
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
@@ -551,10 +549,8 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
- (sbi->s_mount_state & EXT4_FC_REPLAY))
+ if (ext4_fc_disabled(inode->i_sb))
return;
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
@@ -576,22 +572,20 @@ static int __track_inode(struct inode *inode, void *arg, bool update)
void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int ret;
if (S_ISDIR(inode->i_mode))
return;
+ if (ext4_fc_disabled(inode->i_sb))
+ return;
+
if (ext4_should_journal_data(inode)) {
ext4_fc_mark_ineligible(inode->i_sb,
EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
return;
}
- if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
- (sbi->s_mount_state & EXT4_FC_REPLAY))
- return;
-
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
return;
@@ -634,15 +628,13 @@ static int __track_range(struct inode *inode, void *arg, bool update)
void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
ext4_lblk_t end)
{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct __track_range_args args;
int ret;
if (S_ISDIR(inode->i_mode))
return;
- if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
- (sbi->s_mount_state & EXT4_FC_REPLAY))
+ if (ext4_fc_disabled(inode->i_sb))
return;
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
@@ -710,10 +702,10 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
* After allocating len, we should have space at least for a 0 byte
* padding.
*/
- if (len + sizeof(struct ext4_fc_tl) > bsize)
+ if (len + EXT4_FC_TAG_BASE_LEN > bsize)
return NULL;
- if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
+ if (bsize - off - 1 > len + EXT4_FC_TAG_BASE_LEN) {
/*
* Only allocate from current buffer if we have enough space for
* this request AND we have space to add a zero byte padding.
@@ -730,10 +722,10 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
/* Need to add PAD tag */
tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
- pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
+ pad_len = bsize - off - 1 - EXT4_FC_TAG_BASE_LEN;
tl->fc_len = cpu_to_le16(pad_len);
if (crc)
- *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl));
+ *crc = ext4_chksum(sbi, *crc, tl, EXT4_FC_TAG_BASE_LEN);
if (pad_len > 0)
ext4_fc_memzero(sb, tl + 1, pad_len, crc);
ext4_fc_submit_bh(sb, false);
@@ -775,7 +767,7 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
* ext4_fc_reserve_space takes care of allocating an extra block if
* there's no enough space on this block for accommodating this tail.
*/
- dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
+ dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(tail), &crc);
if (!dst)
return -ENOSPC;
@@ -785,8 +777,8 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
- ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
- dst += sizeof(tl);
+ ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc);
+ dst += EXT4_FC_TAG_BASE_LEN;
tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
dst += sizeof(tail.fc_tid);
@@ -808,15 +800,15 @@ static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
struct ext4_fc_tl tl;
u8 *dst;
- dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
+ dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc);
if (!dst)
return false;
tl.fc_tag = cpu_to_le16(tag);
tl.fc_len = cpu_to_le16(len);
- ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
- ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
+ ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
+ ext4_fc_memcpy(sb, dst + EXT4_FC_TAG_BASE_LEN, val, len, crc);
return true;
}
@@ -828,8 +820,8 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
struct ext4_fc_dentry_info fcd;
struct ext4_fc_tl tl;
int dlen = fc_dentry->fcd_name.len;
- u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
- crc);
+ u8 *dst = ext4_fc_reserve_space(sb,
+ EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc);
if (!dst)
return false;
@@ -838,8 +830,8 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
- ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
- dst += sizeof(tl);
+ ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
+ dst += EXT4_FC_TAG_BASE_LEN;
ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
dst += sizeof(fcd);
ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
@@ -874,22 +866,25 @@ static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
+ ret = -ECANCELED;
dst = ext4_fc_reserve_space(inode->i_sb,
- sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
+ EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc);
if (!dst)
- return -ECANCELED;
+ goto err;
- if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
- return -ECANCELED;
- dst += sizeof(tl);
+ if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc))
+ goto err;
+ dst += EXT4_FC_TAG_BASE_LEN;
if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
- return -ECANCELED;
+ goto err;
dst += sizeof(fc_inode);
if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
inode_len, crc))
- return -ECANCELED;
-
- return 0;
+ goto err;
+ ret = 0;
+err:
+ brelse(iloc.bh);
+ return ret;
}
/*
@@ -1343,7 +1338,7 @@ struct dentry_info_args {
};
static inline void tl_to_darg(struct dentry_info_args *darg,
- struct ext4_fc_tl *tl, u8 *val)
+ struct ext4_fc_tl *tl, u8 *val)
{
struct ext4_fc_dentry_info fcd;
@@ -1352,8 +1347,14 @@ static inline void tl_to_darg(struct dentry_info_args *darg,
darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino);
darg->ino = le32_to_cpu(fcd.fc_ino);
darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname);
- darg->dname_len = le16_to_cpu(tl->fc_len) -
- sizeof(struct ext4_fc_dentry_info);
+ darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info);
+}
+
+static inline void ext4_fc_get_tl(struct ext4_fc_tl *tl, u8 *val)
+{
+ memcpy(tl, val, EXT4_FC_TAG_BASE_LEN);
+ tl->fc_len = le16_to_cpu(tl->fc_len);
+ tl->fc_tag = le16_to_cpu(tl->fc_tag);
}
/* Unlink replay function */
@@ -1491,13 +1492,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
if (state->fc_modified_inodes[i] == ino)
return 0;
if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
- state->fc_modified_inodes = krealloc(
- state->fc_modified_inodes,
+ int *fc_modified_inodes;
+
+ fc_modified_inodes = krealloc(state->fc_modified_inodes,
sizeof(int) * (state->fc_modified_inodes_size +
EXT4_FC_REPLAY_REALLOC_INCREMENT),
GFP_KERNEL);
- if (!state->fc_modified_inodes)
+ if (!fc_modified_inodes)
return -ENOMEM;
+ state->fc_modified_inodes = fc_modified_inodes;
state->fc_modified_inodes_size +=
EXT4_FC_REPLAY_REALLOC_INCREMENT;
}
@@ -1516,8 +1519,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
struct ext4_inode *raw_fc_inode;
struct inode *inode = NULL;
struct ext4_iloc iloc;
- int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
+ int inode_len, ino, ret, tag = tl->fc_tag;
struct ext4_extent_header *eh;
+ size_t off_gen = offsetof(struct ext4_inode, i_generation);
memcpy(&fc_inode, val, sizeof(fc_inode));
@@ -1541,12 +1545,12 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
if (ret)
goto out;
- inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode);
+ inode_len = tl->fc_len - sizeof(struct ext4_fc_inode);
raw_inode = ext4_raw_inode(&iloc);
memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
- memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
- inode_len - offsetof(struct ext4_inode, i_generation));
+ memcpy((u8 *)raw_inode + off_gen, (u8 *)raw_fc_inode + off_gen,
+ inode_len - off_gen);
if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
if (eh->eh_magic != EXT4_EXT_MAGIC) {
@@ -1682,15 +1686,18 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
if (replay && state->fc_regions_used != state->fc_regions_valid)
state->fc_regions_used = state->fc_regions_valid;
if (state->fc_regions_used == state->fc_regions_size) {
+ struct ext4_fc_alloc_region *fc_regions;
+
+ fc_regions = krealloc(state->fc_regions,
+ sizeof(struct ext4_fc_alloc_region) *
+ (state->fc_regions_size +
+ EXT4_FC_REPLAY_REALLOC_INCREMENT),
+ GFP_KERNEL);
+ if (!fc_regions)
+ return -ENOMEM;
state->fc_regions_size +=
EXT4_FC_REPLAY_REALLOC_INCREMENT;
- state->fc_regions = krealloc(
- state->fc_regions,
- state->fc_regions_size *
- sizeof(struct ext4_fc_alloc_region),
- GFP_KERNEL);
- if (!state->fc_regions)
- return -ENOMEM;
+ state->fc_regions = fc_regions;
}
region = &state->fc_regions[state->fc_regions_used++];
region->ino = ino;
@@ -1770,8 +1777,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
ret = ext4_ext_insert_extent(
NULL, inode, &path, &newex, 0);
up_write((&EXT4_I(inode)->i_data_sem));
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
if (ret)
goto out;
goto next;
@@ -1926,8 +1932,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
for (j = 0; j < path->p_depth; j++)
ext4_mb_mark_bb(inode->i_sb,
path[j].p_block, 1, 1);
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
}
cur += ret;
ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
@@ -1972,6 +1977,34 @@ void ext4_fc_replay_cleanup(struct super_block *sb)
kfree(sbi->s_fc_replay_state.fc_modified_inodes);
}
+static inline bool ext4_fc_tag_len_isvalid(struct ext4_fc_tl *tl,
+ u8 *val, u8 *end)
+{
+ if (val + tl->fc_len > end)
+ return false;
+
+ /* Here only check ADD_RANGE/TAIL/HEAD which will read data when do
+ * journal rescan before do CRC check. Other tags length check will
+ * rely on CRC check.
+ */
+ switch (tl->fc_tag) {
+ case EXT4_FC_TAG_ADD_RANGE:
+ return (sizeof(struct ext4_fc_add_range) == tl->fc_len);
+ case EXT4_FC_TAG_TAIL:
+ return (sizeof(struct ext4_fc_tail) <= tl->fc_len);
+ case EXT4_FC_TAG_HEAD:
+ return (sizeof(struct ext4_fc_head) == tl->fc_len);
+ case EXT4_FC_TAG_DEL_RANGE:
+ case EXT4_FC_TAG_LINK:
+ case EXT4_FC_TAG_UNLINK:
+ case EXT4_FC_TAG_CREAT:
+ case EXT4_FC_TAG_INODE:
+ case EXT4_FC_TAG_PAD:
+ default:
+ return true;
+ }
+}
+
/*
* Recovery Scan phase handler
*
@@ -2028,12 +2061,18 @@ static int ext4_fc_replay_scan(journal_t *journal,
}
state->fc_replay_expected_off++;
- for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
- memcpy(&tl, cur, sizeof(tl));
- val = cur + sizeof(tl);
+ for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
+ cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
+ ext4_fc_get_tl(&tl, cur);
+ val = cur + EXT4_FC_TAG_BASE_LEN;
+ if (!ext4_fc_tag_len_isvalid(&tl, val, end)) {
+ ret = state->fc_replay_num_tags ?
+ JBD2_FC_REPLAY_STOP : -ECANCELED;
+ goto out_err;
+ }
ext4_debug("Scan phase, tag:%s, blk %lld\n",
- tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr);
- switch (le16_to_cpu(tl.fc_tag)) {
+ tag2str(tl.fc_tag), bh->b_blocknr);
+ switch (tl.fc_tag) {
case EXT4_FC_TAG_ADD_RANGE:
memcpy(&ext, val, sizeof(ext));
ex = (struct ext4_extent *)&ext.fc_ex;
@@ -2053,13 +2092,13 @@ static int ext4_fc_replay_scan(journal_t *journal,
case EXT4_FC_TAG_PAD:
state->fc_cur_tag++;
state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
- sizeof(tl) + le16_to_cpu(tl.fc_len));
+ EXT4_FC_TAG_BASE_LEN + tl.fc_len);
break;
case EXT4_FC_TAG_TAIL:
state->fc_cur_tag++;
memcpy(&tail, val, sizeof(tail));
state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
- sizeof(tl) +
+ EXT4_FC_TAG_BASE_LEN +
offsetof(struct ext4_fc_tail,
fc_crc));
if (le32_to_cpu(tail.fc_tid) == expected_tid &&
@@ -2086,7 +2125,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
}
state->fc_cur_tag++;
state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
- sizeof(tl) + le16_to_cpu(tl.fc_len));
+ EXT4_FC_TAG_BASE_LEN + tl.fc_len);
break;
default:
ret = state->fc_replay_num_tags ?
@@ -2141,19 +2180,20 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
start = (u8 *)bh->b_data;
end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
- for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
- memcpy(&tl, cur, sizeof(tl));
- val = cur + sizeof(tl);
+ for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
+ cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
+ ext4_fc_get_tl(&tl, cur);
+ val = cur + EXT4_FC_TAG_BASE_LEN;
if (state->fc_replay_num_tags == 0) {
ret = JBD2_FC_REPLAY_STOP;
ext4_fc_set_bitmaps_and_counters(sb);
break;
}
- ext4_debug("Replay phase, tag:%s\n",
- tag2str(le16_to_cpu(tl.fc_tag)));
+
+ ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag));
state->fc_replay_num_tags--;
- switch (le16_to_cpu(tl.fc_tag)) {
+ switch (tl.fc_tag) {
case EXT4_FC_TAG_LINK:
ret = ext4_fc_replay_link(sb, &tl, val);
break;
@@ -2174,19 +2214,18 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
break;
case EXT4_FC_TAG_PAD:
trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
- le16_to_cpu(tl.fc_len), 0);
+ tl.fc_len, 0);
break;
case EXT4_FC_TAG_TAIL:
- trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
- le16_to_cpu(tl.fc_len), 0);
+ trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL,
+ 0, tl.fc_len, 0);
memcpy(&tail, val, sizeof(tail));
WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
break;
case EXT4_FC_TAG_HEAD:
break;
default:
- trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0,
- le16_to_cpu(tl.fc_len), 0);
+ trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0);
ret = -ECANCELED;
break;
}
diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
index 1db12847a83b..a6154c3ed135 100644
--- a/fs/ext4/fast_commit.h
+++ b/fs/ext4/fast_commit.h
@@ -70,6 +70,9 @@ struct ext4_fc_tail {
__le32 fc_crc;
};
+/* Tag base length */
+#define EXT4_FC_TAG_BASE_LEN (sizeof(struct ext4_fc_tl))
+
/*
* Fast commit status codes
*/
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 109d07629f81..a7a597c727e6 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -36,19 +36,34 @@
#include "acl.h"
#include "truncate.h"
-static bool ext4_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
+/*
+ * Returns %true if the given DIO request should be attempted with DIO, or
+ * %false if it should fall back to buffered I/O.
+ *
+ * DIO isn't well specified; when it's unsupported (either due to the request
+ * being misaligned, or due to the file not supporting DIO at all), filesystems
+ * either fall back to buffered I/O or return EINVAL. For files that don't use
+ * any special features like encryption or verity, ext4 has traditionally
+ * returned EINVAL for misaligned DIO. iomap_dio_rw() uses this convention too.
+ * In this case, we should attempt the DIO, *not* fall back to buffered I/O.
+ *
+ * In contrast, in cases where DIO is unsupported due to ext4 features, ext4
+ * traditionally falls back to buffered I/O.
+ *
+ * This function implements the traditional ext4 behavior in all these cases.
+ */
+static bool ext4_should_use_dio(struct kiocb *iocb, struct iov_iter *iter)
{
struct inode *inode = file_inode(iocb->ki_filp);
+ u32 dio_align = ext4_dio_alignment(inode);
- if (!fscrypt_dio_supported(iocb, iter))
- return false;
- if (fsverity_active(inode))
- return false;
- if (ext4_should_journal_data(inode))
+ if (dio_align == 0)
return false;
- if (ext4_has_inline_data(inode))
- return false;
- return true;
+
+ if (dio_align == 1)
+ return true;
+
+ return IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), dio_align);
}
static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -63,7 +78,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
inode_lock_shared(inode);
}
- if (!ext4_dio_supported(iocb, to)) {
+ if (!ext4_should_use_dio(iocb, to)) {
inode_unlock_shared(inode);
/*
* Fallback to buffered I/O if the operation being performed on
@@ -511,7 +526,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
/* Fallback to buffered I/O if the inode does not support direct I/O. */
- if (!ext4_dio_supported(iocb, from)) {
+ if (!ext4_should_use_dio(iocb, from)) {
if (ilock_shared)
inode_unlock_shared(inode);
else
@@ -528,6 +543,12 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
ret = -EAGAIN;
goto out;
}
+ /*
+ * Make sure inline data cannot be created anymore since we are going
+ * to allocate blocks for DIO. We know the inode does not have any
+ * inline data now because ext4_dio_supported() checked for that.
+ */
+ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
offset = iocb->ki_pos;
count = ret;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 208b87ce8858..e9bc46684106 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -463,10 +463,9 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
hinfo.hash_version = DX_HASH_HALF_MD4;
hinfo.seed = sbi->s_hash_seed;
ext4fs_dirhash(parent, qstr->name, qstr->len, &hinfo);
- grp = hinfo.hash;
+ parent_group = hinfo.hash % ngroups;
} else
- grp = prandom_u32();
- parent_group = (unsigned)grp % ngroups;
+ parent_group = prandom_u32_max(ngroups);
for (i = 0; i < ngroups; i++) {
g = (parent_group + i) % ngroups;
get_orlov_stats(sb, g, flex_size, &stats);
@@ -1280,7 +1279,7 @@ got:
EXT4_GROUP_INFO_IBITMAP_CORRUPT);
goto out;
}
- inode->i_generation = prandom_u32();
+ inode->i_generation = get_random_u32();
/* Precompute checksum seed for inode metadata */
if (ext4_has_metadata_csum(sb)) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 601214453c3a..2b5ef1b64249 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1188,6 +1188,13 @@ retry_grab:
page = grab_cache_page_write_begin(mapping, index);
if (!page)
return -ENOMEM;
+ /*
+ * The same as page allocation, we prealloc buffer heads before
+ * starting the handle.
+ */
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, inode->i_sb->s_blocksize, 0);
+
unlock_page(page);
retry_journal:
@@ -5342,6 +5349,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
int error, rc = 0;
int orphan = 0;
const unsigned int ia_valid = attr->ia_valid;
+ bool inc_ivers = true;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
@@ -5425,8 +5433,8 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return -EINVAL;
}
- if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
- inode_inc_iversion(inode);
+ if (attr->ia_size == inode->i_size)
+ inc_ivers = false;
if (shrink) {
if (ext4_should_order_data(inode)) {
@@ -5528,6 +5536,8 @@ out_mmap_sem:
}
if (!error) {
+ if (inc_ivers)
+ inode_inc_iversion(inode);
setattr_copy(mnt_userns, inode, attr);
mark_inode_dirty(inode);
}
@@ -5550,6 +5560,22 @@ err_out:
return error;
}
+u32 ext4_dio_alignment(struct inode *inode)
+{
+ if (fsverity_active(inode))
+ return 0;
+ if (ext4_should_journal_data(inode))
+ return 0;
+ if (ext4_has_inline_data(inode))
+ return 0;
+ if (IS_ENCRYPTED(inode)) {
+ if (!fscrypt_dio_supported(inode))
+ return 0;
+ return i_blocksize(inode);
+ }
+ return 1; /* use the iomap defaults */
+}
+
int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
@@ -5565,6 +5591,27 @@ int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path,
stat->btime.tv_nsec = ei->i_crtime.tv_nsec;
}
+ /*
+ * Return the DIO alignment restrictions if requested. We only return
+ * this information when requested, since on encrypted files it might
+ * take a fair bit of work to get if the file wasn't opened recently.
+ */
+ if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
+ u32 dio_align = ext4_dio_alignment(inode);
+
+ stat->result_mask |= STATX_DIOALIGN;
+ if (dio_align == 1) {
+ struct block_device *bdev = inode->i_sb->s_bdev;
+
+ /* iomap defaults */
+ stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
+ stat->dio_offset_align = bdev_logical_block_size(bdev);
+ } else {
+ stat->dio_mem_align = dio_align;
+ stat->dio_offset_align = dio_align;
+ }
+ }
+
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
if (flags & EXT4_APPEND_FL)
stat->attributes |= STATX_ATTR_APPEND;
@@ -5731,9 +5778,6 @@ int ext4_mark_iloc_dirty(handle_t *handle,
}
ext4_fc_track_inode(handle, inode);
- if (IS_I_VERSION(inode))
- inode_inc_iversion(inode);
-
/* the do_update_inode consumes one bh->b_count */
get_bh(iloc->bh);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 3cf3ec4b1c21..95dfea28bf4e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -145,9 +145,8 @@ static int ext4_update_backup_sb(struct super_block *sb,
if (ext4_has_metadata_csum(sb) &&
es->s_checksum != ext4_superblock_csum(sb, es)) {
ext4_msg(sb, KERN_ERR, "Invalid checksum for backup "
- "superblock %llu\n", sb_block);
+ "superblock %llu", sb_block);
unlock_buffer(bh);
- err = -EFSBADCRC;
goto out_bh;
}
func(es, arg);
@@ -452,9 +451,10 @@ static long swap_inode_boot_loader(struct super_block *sb,
swap_inode_data(inode, inode_bl);
inode->i_ctime = inode_bl->i_ctime = current_time(inode);
+ inode_inc_iversion(inode);
- inode->i_generation = prandom_u32();
- inode_bl->i_generation = prandom_u32();
+ inode->i_generation = get_random_u32();
+ inode_bl->i_generation = get_random_u32();
ext4_reset_inode_seed(inode);
ext4_reset_inode_seed(inode_bl);
@@ -665,6 +665,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
ext4_set_inode_flags(inode, false);
inode->i_ctime = current_time(inode);
+ inode_inc_iversion(inode);
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
flags_err:
@@ -775,6 +776,7 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid)
EXT4_I(inode)->i_projid = kprojid;
inode->i_ctime = current_time(inode);
+ inode_inc_iversion(inode);
out_dirty:
rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
if (!err)
@@ -1060,9 +1062,6 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
if (!EXT4_SB(sb)->s_journal)
return -ENODEV;
- if (flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID)
- return -EINVAL;
-
if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
!bdev_max_discard_sectors(EXT4_SB(sb)->s_journal->j_dev))
return -EOPNOTSUPP;
@@ -1257,6 +1256,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err == 0) {
inode->i_ctime = current_time(inode);
+ inode_inc_iversion(inode);
inode->i_generation = generation;
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 71f5b67d7f28..9dad93059945 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -910,7 +910,7 @@ static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- struct ext4_group_info *grp, *iter;
+ struct ext4_group_info *grp = NULL, *iter;
int i;
if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) {
@@ -927,7 +927,6 @@ static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
continue;
}
- grp = NULL;
list_for_each_entry(iter, &sbi->s_mb_avg_fragment_size[i],
bb_avg_fragment_size_node) {
if (sbi->s_mb_stats)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 54e7d3c95fd7..a19a9661646e 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -56,8 +56,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
err_out:
up_write((&EXT4_I(inode)->i_data_sem));
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
lb->first_pblock = 0;
return retval;
}
@@ -425,7 +424,8 @@ int ext4_ext_migrate(struct inode *inode)
* already is extent-based, error out.
*/
if (!ext4_has_feature_extents(inode->i_sb) ||
- (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
+ ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
+ ext4_has_inline_data(inode))
return -EINVAL;
if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 9af68a7ecdcf..588cb09c5291 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -265,7 +265,7 @@ static unsigned int mmp_new_seq(void)
u32 new_seq;
do {
- new_seq = prandom_u32();
+ new_seq = get_random_u32();
} while (new_seq > EXT4_MMP_SEQ_MAX);
return new_seq;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 701f1d6a217f..044e34cd835c 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -32,8 +32,7 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
if (IS_ERR(path))
return PTR_ERR(path);
if (path[ext_depth(inode)].p_ext == NULL) {
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
*ppath = NULL;
return -ENODATA;
}
@@ -103,12 +102,10 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
if (unwritten != ext4_ext_is_unwritten(ext))
goto out;
from += ext4_ext_get_actual_len(ext);
- ext4_ext_drop_refs(path);
}
ret = 1;
out:
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
return ret;
}
@@ -472,19 +469,17 @@ mext_check_arguments(struct inode *orig_inode,
if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
return -EPERM;
- /* Ext4 move extent does not support swapfile */
+ /* Ext4 move extent does not support swap files */
if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
- ext4_debug("ext4 move extent: The argument files should "
- "not be swapfile [ino:orig %lu, donor %lu]\n",
+ ext4_debug("ext4 move extent: The argument files should not be swap files [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
- return -EBUSY;
+ return -ETXTBSY;
}
if (ext4_is_quota_file(orig_inode) && ext4_is_quota_file(donor_inode)) {
- ext4_debug("ext4 move extent: The argument files should "
- "not be quota files [ino:orig %lu, donor %lu]\n",
+ ext4_debug("ext4 move extent: The argument files should not be quota files [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
- return -EBUSY;
+ return -EOPNOTSUPP;
}
/* Ext4 move extent supports only extent based file */
@@ -631,11 +626,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
if (ret)
goto out;
ex = path[path->p_depth].p_ext;
- next_blk = ext4_ext_next_allocated_block(path);
cur_blk = le32_to_cpu(ex->ee_block);
cur_len = ext4_ext_get_actual_len(ex);
/* Check hole before the start pos */
if (cur_blk + cur_len - 1 < o_start) {
+ next_blk = ext4_ext_next_allocated_block(path);
if (next_blk == EXT_MAX_BLOCKS) {
ret = -ENODATA;
goto out;
@@ -663,7 +658,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
donor_page_index = d_start >> (PAGE_SHIFT -
donor_inode->i_blkbits);
offset_in_page = o_start % blocks_per_page;
- if (cur_len > blocks_per_page- offset_in_page)
+ if (cur_len > blocks_per_page - offset_in_page)
cur_len = blocks_per_page - offset_in_page;
/*
* Up semaphore to avoid following problems:
@@ -694,8 +689,7 @@ out:
ext4_discard_preallocations(donor_inode, 0);
}
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
ext4_double_up_write_data_sem(orig_inode, donor_inode);
unlock_two_nondirectories(orig_inode, donor_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 3a31b662f661..c08c0aba1883 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -85,15 +85,20 @@ static struct buffer_head *ext4_append(handle_t *handle,
return bh;
inode->i_size += inode->i_sb->s_blocksize;
EXT4_I(inode)->i_disksize = inode->i_size;
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (err)
+ goto out;
BUFFER_TRACE(bh, "get_write_access");
err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
EXT4_JTR_NONE);
- if (err) {
- brelse(bh);
- ext4_std_error(inode->i_sb, err);
- return ERR_PTR(err);
- }
+ if (err)
+ goto out;
return bh;
+
+out:
+ brelse(bh);
+ ext4_std_error(inode->i_sb, err);
+ return ERR_PTR(err);
}
static int ext4_dx_csum_verify(struct inode *inode,
@@ -126,7 +131,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
struct ext4_dir_entry *dirent;
int is_dx_block = 0;
- if (block >= inode->i_size) {
+ if (block >= inode->i_size >> inode->i_blkbits) {
ext4_error_inode(inode, func, line, block,
"Attempting to read directory block (%u) that is past i_size (%llu)",
block, inode->i_size);
@@ -2254,8 +2259,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
memset(de, 0, len); /* wipe old data */
de = (struct ext4_dir_entry_2 *) data2;
top = data2 + len;
- while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
+ while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) {
+ if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len,
+ (data2 + (blocksize - csum_size) -
+ (char *) de))) {
+ brelse(bh2);
+ brelse(bh);
+ return -EFSCORRUPTED;
+ }
de = de2;
+ }
de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
(char *) de, blocksize);
@@ -2849,7 +2862,7 @@ retry:
}
static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+ struct file *file, umode_t mode)
{
handle_t *handle;
struct inode *inode;
@@ -2871,7 +2884,7 @@ retry:
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
- d_tmpfile(dentry, inode);
+ d_tmpfile(file, inode);
err = ext4_orphan_add(handle, inode);
if (err)
goto err_unlock_inode;
@@ -2882,7 +2895,7 @@ retry:
ext4_journal_stop(handle);
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
goto retry;
- return err;
+ return finish_open_simple(file, err);
err_unlock_inode:
ext4_journal_stop(handle);
unlock_new_inode(inode);
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index e02a5f14e021..3d21eae267fc 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -75,7 +75,7 @@ static void __read_end_io(struct bio *bio)
bio_for_each_segment_all(bv, bio, iter_all) {
page = bv->bv_page;
- /* PG_error was set if any post_read step failed */
+ /* PG_error was set if verity failed. */
if (bio->bi_status || PageError(page)) {
ClearPageUptodate(page);
/* will re-read again later */
@@ -96,10 +96,12 @@ static void decrypt_work(struct work_struct *work)
{
struct bio_post_read_ctx *ctx =
container_of(work, struct bio_post_read_ctx, work);
+ struct bio *bio = ctx->bio;
- fscrypt_decrypt_bio(ctx->bio);
-
- bio_post_read_processing(ctx);
+ if (fscrypt_decrypt_bio(bio))
+ bio_post_read_processing(ctx);
+ else
+ __read_end_io(bio);
}
static void verity_work(struct work_struct *work)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index fea2a68d067b..46b87ffeb304 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1158,6 +1158,7 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
while (group < sbi->s_groups_count) {
struct buffer_head *bh;
ext4_fsblk_t backup_block;
+ struct ext4_super_block *es;
/* Out of journal space, and can't get more - abort - so sad */
err = ext4_resize_ensure_credits_batch(handle, 1);
@@ -1186,6 +1187,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
memcpy(bh->b_data, data, size);
if (rest)
memset(bh->b_data + size, 0, rest);
+ es = (struct ext4_super_block *) bh->b_data;
+ es->s_block_group_nr = cpu_to_le16(group);
+ if (ext4_has_metadata_csum(sb))
+ es->s_checksum = ext4_superblock_csum(sb, es);
set_buffer_uptodate(bh);
unlock_buffer(bh);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
@@ -2122,7 +2127,7 @@ retry:
goto out;
}
- if (ext4_blocks_count(es) == n_blocks_count)
+ if (ext4_blocks_count(es) == n_blocks_count && n_blocks_count_retry == 0)
goto out;
err = ext4_alloc_flex_bg_array(sb, n_group + 1);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9a66abcca1a8..7cdd2138c897 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -205,19 +205,12 @@ int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io
int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
{
- if (trylock_buffer(bh)) {
- if (wait)
- return ext4_read_bh(bh, op_flags, NULL);
+ lock_buffer(bh);
+ if (!wait) {
ext4_read_bh_nowait(bh, op_flags, NULL);
return 0;
}
- if (wait) {
- wait_on_buffer(bh);
- if (buffer_uptodate(bh))
- return 0;
- return -EIO;
- }
- return 0;
+ return ext4_read_bh(bh, op_flags, NULL);
}
/*
@@ -264,7 +257,8 @@ void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
if (likely(bh)) {
- ext4_read_bh_lock(bh, REQ_RAHEAD, false);
+ if (trylock_buffer(bh))
+ ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL);
brelse(bh);
}
}
@@ -1576,7 +1570,7 @@ enum {
Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
Opt_resgid, Opt_resuid, Opt_sb,
Opt_nouid32, Opt_debug, Opt_removed,
- Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+ Opt_user_xattr, Opt_acl,
Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
@@ -1585,7 +1579,7 @@ enum {
Opt_inlinecrypt,
Opt_usrjquota, Opt_grpjquota, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
+ Opt_usrquota, Opt_grpquota, Opt_prjquota,
Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize,
@@ -1662,9 +1656,7 @@ static const struct fs_parameter_spec ext4_param_specs[] = {
fsparam_flag ("oldalloc", Opt_removed),
fsparam_flag ("orlov", Opt_removed),
fsparam_flag ("user_xattr", Opt_user_xattr),
- fsparam_flag ("nouser_xattr", Opt_nouser_xattr),
fsparam_flag ("acl", Opt_acl),
- fsparam_flag ("noacl", Opt_noacl),
fsparam_flag ("norecovery", Opt_noload),
fsparam_flag ("noload", Opt_noload),
fsparam_flag ("bh", Opt_removed),
@@ -1694,7 +1686,7 @@ static const struct fs_parameter_spec ext4_param_specs[] = {
fsparam_flag ("barrier", Opt_barrier),
fsparam_u32 ("barrier", Opt_barrier),
fsparam_flag ("nobarrier", Opt_nobarrier),
- fsparam_flag ("i_version", Opt_i_version),
+ fsparam_flag ("i_version", Opt_removed),
fsparam_flag ("dax", Opt_dax),
fsparam_enum ("dax", Opt_dax_type, ext4_param_dax),
fsparam_u32 ("stripe", Opt_stripe),
@@ -1749,10 +1741,6 @@ static const struct fs_parameter_spec ext4_param_specs[] = {
#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
-static const char deprecated_msg[] =
- "Mount option \"%s\" will be removed by %s\n"
- "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
-
#define MOPT_SET 0x0001
#define MOPT_CLEAR 0x0002
#define MOPT_NOSUPPORT 0x0004
@@ -1814,13 +1802,10 @@ static const struct mount_opts {
{Opt_journal_ioprio, 0, MOPT_NO_EXT2},
{Opt_data, 0, MOPT_NO_EXT2},
{Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
- {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
#ifdef CONFIG_EXT4_FS_POSIX_ACL
{Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
- {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
#else
{Opt_acl, 0, MOPT_NOSUPPORT},
- {Opt_noacl, 0, MOPT_NOSUPPORT},
#endif
{Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
{Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
@@ -2120,10 +2105,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
else
return note_qf_name(fc, GRPQUOTA, param);
#endif
- case Opt_noacl:
- case Opt_nouser_xattr:
- ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "3.5");
- break;
case Opt_sb:
if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
ext4_msg(NULL, KERN_WARNING,
@@ -2140,11 +2121,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_abort:
ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED);
return 0;
- case Opt_i_version:
- ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20");
- ext4_msg(NULL, KERN_WARNING, "Use iversion instead\n");
- ctx_set_flags(ctx, SB_I_VERSION);
- return 0;
case Opt_inlinecrypt:
#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
ctx_set_flags(ctx, SB_INLINECRYPT);
@@ -2814,14 +2790,6 @@ static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
sb->s_flags &= ~ctx->mask_s_flags;
sb->s_flags |= ctx->vals_s_flags;
- /*
- * i_version differs from common mount option iversion so we have
- * to let vfs know that it was set, otherwise it would get cleared
- * on remount
- */
- if (ctx->mask_s_flags & SB_I_VERSION)
- fc->sb_flags |= SB_I_VERSION;
-
#define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; })
APPLY(s_commit_interval);
APPLY(s_stripe);
@@ -2970,8 +2938,6 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
- if (sb->s_flags & SB_I_VERSION)
- SEQ_OPTS_PUTS("i_version");
if (nodefs || sbi->s_stripe)
SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
if (nodefs || EXT4_MOUNT_DATA_FLAGS &
@@ -3767,6 +3733,7 @@ static int ext4_lazyinit_thread(void *arg)
unsigned long next_wakeup, cur;
BUG_ON(NULL == eli);
+ set_freezable();
cont_thread:
while (true) {
@@ -3811,8 +3778,7 @@ cont_thread:
}
if (!progress) {
elr->lr_next_sched = jiffies +
- (prandom_u32()
- % (EXT4_DEF_LI_MAX_START_DELAY * HZ));
+ prandom_u32_max(EXT4_DEF_LI_MAX_START_DELAY * HZ);
}
if (time_before(elr->lr_next_sched, next_wakeup))
next_wakeup = elr->lr_next_sched;
@@ -3959,8 +3925,8 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
* spread the inode table initialization requests
* better.
*/
- elr->lr_next_sched = jiffies + (prandom_u32() %
- (EXT4_DEF_LI_MAX_START_DELAY * HZ));
+ elr->lr_next_sched = jiffies + prandom_u32_max(
+ EXT4_DEF_LI_MAX_START_DELAY * HZ);
return elr;
}
@@ -3982,9 +3948,9 @@ int ext4_register_li_request(struct super_block *sb,
goto out;
}
- if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
- (first_not_zeroed == ngroups || sb_rdonly(sb) ||
- !test_opt(sb, INIT_INODE_TABLE)))
+ if (sb_rdonly(sb) ||
+ (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
+ (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE))))
goto out;
elr = ext4_li_request_new(sb, first_not_zeroed);
@@ -4311,112 +4277,10 @@ err_out:
return NULL;
}
-static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
+static void ext4_set_def_opts(struct super_block *sb,
+ struct ext4_super_block *es)
{
- struct buffer_head *bh, **group_desc;
- struct ext4_super_block *es = NULL;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct flex_groups **flex_groups;
- ext4_fsblk_t block;
- ext4_fsblk_t logical_sb_block;
- unsigned long offset = 0;
unsigned long def_mount_opts;
- struct inode *root;
- int ret = -ENOMEM;
- int blocksize, clustersize;
- unsigned int db_count;
- unsigned int i;
- int needs_recovery, has_huge_files;
- __u64 blocks_count;
- int err = 0;
- ext4_group_t first_not_zeroed;
- struct ext4_fs_context *ctx = fc->fs_private;
- int silent = fc->sb_flags & SB_SILENT;
-
- /* Set defaults for the variables that will be set during parsing */
- if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
- ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
-
- sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
- sbi->s_sectors_written_start =
- part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
-
- /* -EINVAL is default */
- ret = -EINVAL;
- blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
- if (!blocksize) {
- ext4_msg(sb, KERN_ERR, "unable to set blocksize");
- goto out_fail;
- }
-
- /*
- * The ext4 superblock will not be buffer aligned for other than 1kB
- * block sizes. We need to calculate the offset from buffer start.
- */
- if (blocksize != EXT4_MIN_BLOCK_SIZE) {
- logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
- offset = do_div(logical_sb_block, blocksize);
- } else {
- logical_sb_block = sbi->s_sb_block;
- }
-
- bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
- if (IS_ERR(bh)) {
- ext4_msg(sb, KERN_ERR, "unable to read superblock");
- ret = PTR_ERR(bh);
- goto out_fail;
- }
- /*
- * Note: s_es must be initialized as soon as possible because
- * some ext4 macro-instructions depend on its value
- */
- es = (struct ext4_super_block *) (bh->b_data + offset);
- sbi->s_es = es;
- sb->s_magic = le16_to_cpu(es->s_magic);
- if (sb->s_magic != EXT4_SUPER_MAGIC)
- goto cantfind_ext4;
- sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
-
- /* Warn if metadata_csum and gdt_csum are both set. */
- if (ext4_has_feature_metadata_csum(sb) &&
- ext4_has_feature_gdt_csum(sb))
- ext4_warning(sb, "metadata_csum and uninit_bg are "
- "redundant flags; please run fsck.");
-
- /* Check for a known checksum algorithm */
- if (!ext4_verify_csum_type(sb, es)) {
- ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
- "unknown checksum algorithm.");
- silent = 1;
- goto cantfind_ext4;
- }
- ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
- ext4_orphan_file_block_trigger);
-
- /* Load the checksum driver */
- sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(sbi->s_chksum_driver)) {
- ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
- ret = PTR_ERR(sbi->s_chksum_driver);
- sbi->s_chksum_driver = NULL;
- goto failed_mount;
- }
-
- /* Check superblock checksum */
- if (!ext4_superblock_csum_verify(sb, es)) {
- ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
- "invalid superblock checksum. Run e2fsck?");
- silent = 1;
- ret = -EFSBADCRC;
- goto cantfind_ext4;
- }
-
- /* Precompute checksum seed for all metadata */
- if (ext4_has_feature_csum_seed(sb))
- sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
- else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
- sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
- sizeof(es->s_uuid));
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
@@ -4445,9 +4309,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
set_opt(sb, WRITEBACK_DATA);
- if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
+ if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC)
set_opt(sb, ERRORS_PANIC);
- else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
+ else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE)
set_opt(sb, ERRORS_CONT);
else
set_opt(sb, ERRORS_RO);
@@ -4456,12 +4320,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (def_mount_opts & EXT4_DEFM_DISCARD)
set_opt(sb, DISCARD);
- sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
- sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
- sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
- sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
- sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
-
if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
set_opt(sb, BARRIER);
@@ -4473,31 +4331,96 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
set_opt(sb, DELALLOC);
- /*
- * set default s_li_wait_mult for lazyinit, for the case there is
- * no mount option specified.
- */
- sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+ if (sb->s_blocksize == PAGE_SIZE)
+ set_opt(sb, DIOREAD_NOLOCK);
+}
- if (le32_to_cpu(es->s_log_block_size) >
- (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
- ext4_msg(sb, KERN_ERR,
- "Invalid log block size: %u",
- le32_to_cpu(es->s_log_block_size));
- goto failed_mount;
- }
- if (le32_to_cpu(es->s_log_cluster_size) >
- (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
- ext4_msg(sb, KERN_ERR,
- "Invalid log cluster size: %u",
- le32_to_cpu(es->s_log_cluster_size));
- goto failed_mount;
+static int ext4_handle_clustersize(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ int clustersize;
+
+ /* Handle clustersize */
+ clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
+ if (ext4_has_feature_bigalloc(sb)) {
+ if (clustersize < sb->s_blocksize) {
+ ext4_msg(sb, KERN_ERR,
+ "cluster size (%d) smaller than "
+ "block size (%lu)", clustersize, sb->s_blocksize);
+ return -EINVAL;
+ }
+ sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
+ le32_to_cpu(es->s_log_block_size);
+ sbi->s_clusters_per_group =
+ le32_to_cpu(es->s_clusters_per_group);
+ if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
+ ext4_msg(sb, KERN_ERR,
+ "#clusters per group too big: %lu",
+ sbi->s_clusters_per_group);
+ return -EINVAL;
+ }
+ if (sbi->s_blocks_per_group !=
+ (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
+ ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
+ "clusters per group (%lu) inconsistent",
+ sbi->s_blocks_per_group,
+ sbi->s_clusters_per_group);
+ return -EINVAL;
+ }
+ } else {
+ if (clustersize != sb->s_blocksize) {
+ ext4_msg(sb, KERN_ERR,
+ "fragment/cluster size (%d) != "
+ "block size (%lu)", clustersize, sb->s_blocksize);
+ return -EINVAL;
+ }
+ if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
+ ext4_msg(sb, KERN_ERR,
+ "#blocks per group too big: %lu",
+ sbi->s_blocks_per_group);
+ return -EINVAL;
+ }
+ sbi->s_clusters_per_group = sbi->s_blocks_per_group;
+ sbi->s_cluster_bits = 0;
}
+ sbi->s_cluster_ratio = clustersize / sb->s_blocksize;
- blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+ /* Do we have standard group size of clustersize * 8 blocks ? */
+ if (sbi->s_blocks_per_group == clustersize << 3)
+ set_opt2(sb, STD_GROUP_SIZE);
- if (blocksize == PAGE_SIZE)
- set_opt(sb, DIOREAD_NOLOCK);
+ return 0;
+}
+
+static void ext4_fast_commit_init(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /* Initialize fast commit stuff */
+ atomic_set(&sbi->s_fc_subtid, 0);
+ INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
+ INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
+ INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
+ INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
+ sbi->s_fc_bytes = 0;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ sbi->s_fc_ineligible_tid = 0;
+ spin_lock_init(&sbi->s_fc_lock);
+ memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
+ sbi->s_fc_replay_state.fc_regions = NULL;
+ sbi->s_fc_replay_state.fc_regions_size = 0;
+ sbi->s_fc_replay_state.fc_regions_used = 0;
+ sbi->s_fc_replay_state.fc_regions_valid = 0;
+ sbi->s_fc_replay_state.fc_modified_inodes = NULL;
+ sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
+ sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
+}
+
+static int ext4_inode_info_init(struct super_block *sb,
+ struct ext4_super_block *es)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
@@ -4508,16 +4431,16 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
sbi->s_first_ino);
- goto failed_mount;
+ return -EINVAL;
}
if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
(!is_power_of_2(sbi->s_inode_size)) ||
- (sbi->s_inode_size > blocksize)) {
+ (sbi->s_inode_size > sb->s_blocksize)) {
ext4_msg(sb, KERN_ERR,
"unsupported inode size: %d",
sbi->s_inode_size);
- ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize);
- goto failed_mount;
+ ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize);
+ return -EINVAL;
}
/*
* i_atime_extra is the last extra field available for
@@ -4535,6 +4458,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
}
sb->s_time_min = EXT4_TIMESTAMP_MIN;
}
+
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
@@ -4546,7 +4470,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (v > max) {
ext4_msg(sb, KERN_ERR,
"bad s_want_extra_isize: %d", v);
- goto failed_mount;
+ return -EINVAL;
}
if (sbi->s_want_extra_isize < v)
sbi->s_want_extra_isize = v;
@@ -4555,91 +4479,112 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (v > max) {
ext4_msg(sb, KERN_ERR,
"bad s_min_extra_isize: %d", v);
- goto failed_mount;
+ return -EINVAL;
}
if (sbi->s_want_extra_isize < v)
sbi->s_want_extra_isize = v;
}
}
- err = parse_apply_sb_mount_options(sb, ctx);
- if (err < 0)
- goto failed_mount;
+ return 0;
+}
- sbi->s_def_mount_opt = sbi->s_mount_opt;
+#if IS_ENABLED(CONFIG_UNICODE)
+static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
+{
+ const struct ext4_sb_encodings *encoding_info;
+ struct unicode_map *encoding;
+ __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
- err = ext4_check_opt_consistency(fc, sb);
- if (err < 0)
- goto failed_mount;
+ if (!ext4_has_feature_casefold(sb) || sb->s_encoding)
+ return 0;
- ext4_apply_options(fc, sb);
+ encoding_info = ext4_sb_read_encoding(es);
+ if (!encoding_info) {
+ ext4_msg(sb, KERN_ERR,
+ "Encoding requested by superblock is unknown");
+ return -EINVAL;
+ }
-#if IS_ENABLED(CONFIG_UNICODE)
- if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
- const struct ext4_sb_encodings *encoding_info;
- struct unicode_map *encoding;
- __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
+ encoding = utf8_load(encoding_info->version);
+ if (IS_ERR(encoding)) {
+ ext4_msg(sb, KERN_ERR,
+ "can't mount with superblock charset: %s-%u.%u.%u "
+ "not supported by the kernel. flags: 0x%x.",
+ encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
+ encoding_flags);
+ return -EINVAL;
+ }
+ ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
+ "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
+ encoding_flags);
- encoding_info = ext4_sb_read_encoding(es);
- if (!encoding_info) {
- ext4_msg(sb, KERN_ERR,
- "Encoding requested by superblock is unknown");
- goto failed_mount;
- }
+ sb->s_encoding = encoding;
+ sb->s_encoding_flags = encoding_flags;
- encoding = utf8_load(encoding_info->version);
- if (IS_ERR(encoding)) {
- ext4_msg(sb, KERN_ERR,
- "can't mount with superblock charset: %s-%u.%u.%u "
- "not supported by the kernel. flags: 0x%x.",
- encoding_info->name,
- unicode_major(encoding_info->version),
- unicode_minor(encoding_info->version),
- unicode_rev(encoding_info->version),
- encoding_flags);
- goto failed_mount;
- }
- ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
- "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
- unicode_major(encoding_info->version),
- unicode_minor(encoding_info->version),
- unicode_rev(encoding_info->version),
- encoding_flags);
+ return 0;
+}
+#else
+static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
+{
+ return 0;
+}
+#endif
+
+static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /* Warn if metadata_csum and gdt_csum are both set. */
+ if (ext4_has_feature_metadata_csum(sb) &&
+ ext4_has_feature_gdt_csum(sb))
+ ext4_warning(sb, "metadata_csum and uninit_bg are "
+ "redundant flags; please run fsck.");
- sb->s_encoding = encoding;
- sb->s_encoding_flags = encoding_flags;
+ /* Check for a known checksum algorithm */
+ if (!ext4_verify_csum_type(sb, es)) {
+ ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
+ "unknown checksum algorithm.");
+ return -EINVAL;
}
-#endif
+ ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
+ ext4_orphan_file_block_trigger);
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
- printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n");
- /* can't mount with both data=journal and dioread_nolock. */
- clear_opt(sb, DIOREAD_NOLOCK);
- clear_opt2(sb, JOURNAL_FAST_COMMIT);
- if (test_opt2(sb, EXPLICIT_DELALLOC)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and delalloc");
- goto failed_mount;
- }
- if (test_opt(sb, DAX_ALWAYS)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and dax");
- goto failed_mount;
- }
- if (ext4_has_feature_encrypt(sb)) {
- ext4_msg(sb, KERN_WARNING,
- "encrypted files will use data=ordered "
- "instead of data journaling mode");
- }
- if (test_opt(sb, DELALLOC))
- clear_opt(sb, DELALLOC);
- } else {
- sb->s_iflags |= SB_I_CGROUPWB;
+ /* Load the checksum driver */
+ sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
+ if (IS_ERR(sbi->s_chksum_driver)) {
+ int ret = PTR_ERR(sbi->s_chksum_driver);
+ ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
+ sbi->s_chksum_driver = NULL;
+ return ret;
}
- sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
- (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
+ /* Check superblock checksum */
+ if (!ext4_superblock_csum_verify(sb, es)) {
+ ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
+ "invalid superblock checksum. Run e2fsck?");
+ return -EFSBADCRC;
+ }
+
+ /* Precompute checksum seed for all metadata */
+ if (ext4_has_feature_csum_seed(sb))
+ sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
+ else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
+ sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
+ sizeof(es->s_uuid));
+ return 0;
+}
+static int ext4_check_feature_compatibility(struct super_block *sb,
+ struct ext4_super_block *es,
+ int silent)
+{
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
(ext4_has_compat_features(sb) ||
ext4_has_ro_compat_features(sb) ||
@@ -4653,7 +4598,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (ext4_has_feature_64bit(sb)) {
ext4_msg(sb, KERN_ERR,
"The Hurd can't support 64-bit file systems");
- goto failed_mount;
+ return -EINVAL;
}
/*
@@ -4663,7 +4608,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (ext4_has_feature_ea_inode(sb)) {
ext4_msg(sb, KERN_ERR,
"ea_inode feature is not supported for Hurd");
- goto failed_mount;
+ return -EINVAL;
}
}
@@ -4677,10 +4622,10 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
* it's actually an ext[34] filesystem.
*/
if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
- goto failed_mount;
+ return -EINVAL;
ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
"to feature incompatibilities");
- goto failed_mount;
+ return -EINVAL;
}
}
@@ -4694,10 +4639,10 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
* it's actually an ext4 filesystem.
*/
if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
- goto failed_mount;
+ return -EINVAL;
ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
"to feature incompatibilities");
- goto failed_mount;
+ return -EINVAL;
}
}
@@ -4707,9 +4652,463 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
* so there is a chance incompat flags are set on a rev 0 filesystem.
*/
if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ext4_geometry_check(struct super_block *sb,
+ struct ext4_super_block *es)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ __u64 blocks_count;
+
+ /* check blocks count against device size */
+ blocks_count = sb_bdev_nr_blocks(sb);
+ if (blocks_count && ext4_blocks_count(es) > blocks_count) {
+ ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
+ "exceeds size of device (%llu blocks)",
+ ext4_blocks_count(es), blocks_count);
+ return -EINVAL;
+ }
+
+ /*
+ * It makes no sense for the first data block to be beyond the end
+ * of the filesystem.
+ */
+ if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
+ ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
+ "block %u is beyond end of filesystem (%llu)",
+ le32_to_cpu(es->s_first_data_block),
+ ext4_blocks_count(es));
+ return -EINVAL;
+ }
+ if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
+ (sbi->s_cluster_ratio == 1)) {
+ ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
+ "block is 0 with a 1k block and cluster size");
+ return -EINVAL;
+ }
+
+ blocks_count = (ext4_blocks_count(es) -
+ le32_to_cpu(es->s_first_data_block) +
+ EXT4_BLOCKS_PER_GROUP(sb) - 1);
+ do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
+ if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
+ ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
+ "(block count %llu, first data block %u, "
+ "blocks per group %lu)", blocks_count,
+ ext4_blocks_count(es),
+ le32_to_cpu(es->s_first_data_block),
+ EXT4_BLOCKS_PER_GROUP(sb));
+ return -EINVAL;
+ }
+ sbi->s_groups_count = blocks_count;
+ sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
+ (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
+ if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
+ le32_to_cpu(es->s_inodes_count)) {
+ ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
+ le32_to_cpu(es->s_inodes_count),
+ ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void ext4_group_desc_free(struct ext4_sb_info *sbi)
+{
+ struct buffer_head **group_desc;
+ int i;
+
+ rcu_read_lock();
+ group_desc = rcu_dereference(sbi->s_group_desc);
+ for (i = 0; i < sbi->s_gdb_count; i++)
+ brelse(group_desc[i]);
+ kvfree(group_desc);
+ rcu_read_unlock();
+}
+
+static int ext4_group_desc_init(struct super_block *sb,
+ struct ext4_super_block *es,
+ ext4_fsblk_t logical_sb_block,
+ ext4_group_t *first_not_zeroed)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ unsigned int db_count;
+ ext4_fsblk_t block;
+ int ret;
+ int i;
+
+ db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
+ EXT4_DESC_PER_BLOCK(sb);
+ if (ext4_has_feature_meta_bg(sb)) {
+ if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
+ ext4_msg(sb, KERN_WARNING,
+ "first meta block group too large: %u "
+ "(group descriptor block count %u)",
+ le32_to_cpu(es->s_first_meta_bg), db_count);
+ return -EINVAL;
+ }
+ }
+ rcu_assign_pointer(sbi->s_group_desc,
+ kvmalloc_array(db_count,
+ sizeof(struct buffer_head *),
+ GFP_KERNEL));
+ if (sbi->s_group_desc == NULL) {
+ ext4_msg(sb, KERN_ERR, "not enough memory");
+ return -ENOMEM;
+ }
+
+ bgl_lock_init(sbi->s_blockgroup_lock);
+
+ /* Pre-read the descriptors into the buffer cache */
+ for (i = 0; i < db_count; i++) {
+ block = descriptor_loc(sb, logical_sb_block, i);
+ ext4_sb_breadahead_unmovable(sb, block);
+ }
+
+ for (i = 0; i < db_count; i++) {
+ struct buffer_head *bh;
+
+ block = descriptor_loc(sb, logical_sb_block, i);
+ bh = ext4_sb_bread_unmovable(sb, block);
+ if (IS_ERR(bh)) {
+ ext4_msg(sb, KERN_ERR,
+ "can't read group descriptor %d", i);
+ sbi->s_gdb_count = i;
+ ret = PTR_ERR(bh);
+ goto out;
+ }
+ rcu_read_lock();
+ rcu_dereference(sbi->s_group_desc)[i] = bh;
+ rcu_read_unlock();
+ }
+ sbi->s_gdb_count = db_count;
+ if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
+ ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
+ return 0;
+out:
+ ext4_group_desc_free(sbi);
+ return ret;
+}
+
+static int ext4_load_and_init_journal(struct super_block *sb,
+ struct ext4_super_block *es,
+ struct ext4_fs_context *ctx)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int err;
+
+ err = ext4_load_journal(sb, es, ctx->journal_devnum);
+ if (err)
+ return err;
+
+ if (ext4_has_feature_64bit(sb) &&
+ !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
+ JBD2_FEATURE_INCOMPAT_64BIT)) {
+ ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
+ goto out;
+ }
+
+ if (!set_journal_csum_feature_set(sb)) {
+ ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
+ "feature set");
+ goto out;
+ }
+
+ if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
+ !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
+ JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to set fast commit journal feature");
+ goto out;
+ }
+
+ /* We have now updated the journal if required, so we can
+ * validate the data journaling mode. */
+ switch (test_opt(sb, DATA_FLAGS)) {
+ case 0:
+ /* No mode set, assume a default based on the journal
+ * capabilities: ORDERED_DATA if the journal can
+ * cope, else JOURNAL_DATA
+ */
+ if (jbd2_journal_check_available_features
+ (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
+ set_opt(sb, ORDERED_DATA);
+ sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
+ } else {
+ set_opt(sb, JOURNAL_DATA);
+ sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
+ }
+ break;
+
+ case EXT4_MOUNT_ORDERED_DATA:
+ case EXT4_MOUNT_WRITEBACK_DATA:
+ if (!jbd2_journal_check_available_features
+ (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
+ ext4_msg(sb, KERN_ERR, "Journal does not support "
+ "requested data journaling mode");
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
+ test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "journal_async_commit in data=ordered mode");
+ goto out;
+ }
+
+ set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
+
+ sbi->s_journal->j_submit_inode_data_buffers =
+ ext4_journal_submit_inode_data_buffers;
+ sbi->s_journal->j_finish_inode_data_buffers =
+ ext4_journal_finish_inode_data_buffers;
+
+ return 0;
+
+out:
+ /* flush s_error_work before journal destroy. */
+ flush_work(&sbi->s_error_work);
+ jbd2_journal_destroy(sbi->s_journal);
+ sbi->s_journal = NULL;
+ return -EINVAL;
+}
+
+static int ext4_journal_data_mode_check(struct super_block *sb)
+{
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+ printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with "
+ "data=journal disables delayed allocation, "
+ "dioread_nolock, O_DIRECT and fast_commit support!\n");
+ /* can't mount with both data=journal and dioread_nolock. */
+ clear_opt(sb, DIOREAD_NOLOCK);
+ clear_opt2(sb, JOURNAL_FAST_COMMIT);
+ if (test_opt2(sb, EXPLICIT_DELALLOC)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and delalloc");
+ return -EINVAL;
+ }
+ if (test_opt(sb, DAX_ALWAYS)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and dax");
+ return -EINVAL;
+ }
+ if (ext4_has_feature_encrypt(sb)) {
+ ext4_msg(sb, KERN_WARNING,
+ "encrypted files will use data=ordered "
+ "instead of data journaling mode");
+ }
+ if (test_opt(sb, DELALLOC))
+ clear_opt(sb, DELALLOC);
+ } else {
+ sb->s_iflags |= SB_I_CGROUPWB;
+ }
+
+ return 0;
+}
+
+static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
+ int silent)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es;
+ ext4_fsblk_t logical_sb_block;
+ unsigned long offset = 0;
+ struct buffer_head *bh;
+ int ret = -EINVAL;
+ int blocksize;
+
+ blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
+ if (!blocksize) {
+ ext4_msg(sb, KERN_ERR, "unable to set blocksize");
+ return -EINVAL;
+ }
+
+ /*
+ * The ext4 superblock will not be buffer aligned for other than 1kB
+ * block sizes. We need to calculate the offset from buffer start.
+ */
+ if (blocksize != EXT4_MIN_BLOCK_SIZE) {
+ logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
+ offset = do_div(logical_sb_block, blocksize);
+ } else {
+ logical_sb_block = sbi->s_sb_block;
+ }
+
+ bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
+ if (IS_ERR(bh)) {
+ ext4_msg(sb, KERN_ERR, "unable to read superblock");
+ return PTR_ERR(bh);
+ }
+ /*
+ * Note: s_es must be initialized as soon as possible because
+ * some ext4 macro-instructions depend on its value
+ */
+ es = (struct ext4_super_block *) (bh->b_data + offset);
+ sbi->s_es = es;
+ sb->s_magic = le16_to_cpu(es->s_magic);
+ if (sb->s_magic != EXT4_SUPER_MAGIC) {
+ if (!silent)
+ ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
+ goto out;
+ }
+
+ if (le32_to_cpu(es->s_log_block_size) >
+ (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid log block size: %u",
+ le32_to_cpu(es->s_log_block_size));
+ goto out;
+ }
+ if (le32_to_cpu(es->s_log_cluster_size) >
+ (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid log cluster size: %u",
+ le32_to_cpu(es->s_log_cluster_size));
+ goto out;
+ }
+
+ blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+
+ /*
+ * If the default block size is not the same as the real block size,
+ * we need to reload it.
+ */
+ if (sb->s_blocksize == blocksize) {
+ *lsb = logical_sb_block;
+ sbi->s_sbh = bh;
+ return 0;
+ }
+
+ /*
+ * bh must be released before kill_bdev(), otherwise
+ * it won't be freed and its page also. kill_bdev()
+ * is called by sb_set_blocksize().
+ */
+ brelse(bh);
+ /* Validate the filesystem blocksize */
+ if (!sb_set_blocksize(sb, blocksize)) {
+ ext4_msg(sb, KERN_ERR, "bad block size %d",
+ blocksize);
+ bh = NULL;
+ goto out;
+ }
+
+ logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
+ offset = do_div(logical_sb_block, blocksize);
+ bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
+ if (IS_ERR(bh)) {
+ ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try");
+ ret = PTR_ERR(bh);
+ bh = NULL;
+ goto out;
+ }
+ es = (struct ext4_super_block *)(bh->b_data + offset);
+ sbi->s_es = es;
+ if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
+ ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!");
+ goto out;
+ }
+ *lsb = logical_sb_block;
+ sbi->s_sbh = bh;
+ return 0;
+out:
+ brelse(bh);
+ return ret;
+}
+
+static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
+{
+ struct ext4_super_block *es = NULL;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct flex_groups **flex_groups;
+ ext4_fsblk_t block;
+ ext4_fsblk_t logical_sb_block;
+ struct inode *root;
+ int ret = -ENOMEM;
+ unsigned int i;
+ int needs_recovery, has_huge_files;
+ int err = 0;
+ ext4_group_t first_not_zeroed;
+ struct ext4_fs_context *ctx = fc->fs_private;
+ int silent = fc->sb_flags & SB_SILENT;
+
+ /* Set defaults for the variables that will be set during parsing */
+ if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
+ ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
+
+ sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
+ sbi->s_sectors_written_start =
+ part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
+
+ /* -EINVAL is default */
+ ret = -EINVAL;
+ err = ext4_load_super(sb, &logical_sb_block, silent);
+ if (err)
+ goto out_fail;
+
+ es = sbi->s_es;
+ sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
+
+ err = ext4_init_metadata_csum(sb, es);
+ if (err)
+ goto failed_mount;
+
+ ext4_set_def_opts(sb, es);
+
+ sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
+ sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
+ sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
+ sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
+ sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
+
+ /*
+ * set default s_li_wait_mult for lazyinit, for the case there is
+ * no mount option specified.
+ */
+ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+
+ if (ext4_inode_info_init(sb, es))
+ goto failed_mount;
+
+ err = parse_apply_sb_mount_options(sb, ctx);
+ if (err < 0)
+ goto failed_mount;
+
+ sbi->s_def_mount_opt = sbi->s_mount_opt;
+
+ err = ext4_check_opt_consistency(fc, sb);
+ if (err < 0)
goto failed_mount;
- if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
+ ext4_apply_options(fc, sb);
+
+ if (ext4_encoding_init(sb, es))
+ goto failed_mount;
+
+ if (ext4_journal_data_mode_check(sb))
+ goto failed_mount;
+
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
+
+ /* i_version is always enabled now */
+ sb->s_flags |= SB_I_VERSION;
+
+ if (ext4_check_feature_compatibility(sb, es, silent))
+ goto failed_mount;
+
+ if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) {
ext4_msg(sb, KERN_ERR,
"Number of reserved GDT blocks insanely large: %d",
le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
@@ -4717,7 +5116,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
}
if (sbi->s_daxdev) {
- if (blocksize == PAGE_SIZE)
+ if (sb->s_blocksize == PAGE_SIZE)
set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
else
ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
@@ -4742,40 +5141,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
goto failed_mount;
}
- if (sb->s_blocksize != blocksize) {
- /*
- * bh must be released before kill_bdev(), otherwise
- * it won't be freed and its page also. kill_bdev()
- * is called by sb_set_blocksize().
- */
- brelse(bh);
- /* Validate the filesystem blocksize */
- if (!sb_set_blocksize(sb, blocksize)) {
- ext4_msg(sb, KERN_ERR, "bad block size %d",
- blocksize);
- bh = NULL;
- goto failed_mount;
- }
-
- logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
- offset = do_div(logical_sb_block, blocksize);
- bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
- if (IS_ERR(bh)) {
- ext4_msg(sb, KERN_ERR,
- "Can't read superblock on 2nd try");
- ret = PTR_ERR(bh);
- bh = NULL;
- goto failed_mount;
- }
- es = (struct ext4_super_block *)(bh->b_data + offset);
- sbi->s_es = es;
- if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
- ext4_msg(sb, KERN_ERR,
- "Magic mismatch, very weird!");
- goto failed_mount;
- }
- }
-
has_huge_files = ext4_has_feature_huge_file(sb);
sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
has_huge_files);
@@ -4797,19 +5162,21 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
- sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
- if (sbi->s_inodes_per_block == 0)
- goto cantfind_ext4;
+ sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb);
+ if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) {
+ if (!silent)
+ ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
+ goto failed_mount;
+ }
if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
- sbi->s_inodes_per_group > blocksize * 8) {
+ sbi->s_inodes_per_group > sb->s_blocksize * 8) {
ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
sbi->s_inodes_per_group);
goto failed_mount;
}
sbi->s_itb_per_group = sbi->s_inodes_per_group /
sbi->s_inodes_per_block;
- sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
- sbi->s_sbh = bh;
+ sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb);
sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
@@ -4835,54 +5202,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
}
}
- /* Handle clustersize */
- clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
- if (ext4_has_feature_bigalloc(sb)) {
- if (clustersize < blocksize) {
- ext4_msg(sb, KERN_ERR,
- "cluster size (%d) smaller than "
- "block size (%d)", clustersize, blocksize);
- goto failed_mount;
- }
- sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
- le32_to_cpu(es->s_log_block_size);
- sbi->s_clusters_per_group =
- le32_to_cpu(es->s_clusters_per_group);
- if (sbi->s_clusters_per_group > blocksize * 8) {
- ext4_msg(sb, KERN_ERR,
- "#clusters per group too big: %lu",
- sbi->s_clusters_per_group);
- goto failed_mount;
- }
- if (sbi->s_blocks_per_group !=
- (sbi->s_clusters_per_group * (clustersize / blocksize))) {
- ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
- "clusters per group (%lu) inconsistent",
- sbi->s_blocks_per_group,
- sbi->s_clusters_per_group);
- goto failed_mount;
- }
- } else {
- if (clustersize != blocksize) {
- ext4_msg(sb, KERN_ERR,
- "fragment/cluster size (%d) != "
- "block size (%d)", clustersize, blocksize);
- goto failed_mount;
- }
- if (sbi->s_blocks_per_group > blocksize * 8) {
- ext4_msg(sb, KERN_ERR,
- "#blocks per group too big: %lu",
- sbi->s_blocks_per_group);
- goto failed_mount;
- }
- sbi->s_clusters_per_group = sbi->s_blocks_per_group;
- sbi->s_cluster_bits = 0;
- }
- sbi->s_cluster_ratio = clustersize / blocksize;
-
- /* Do we have standard group size of clustersize * 8 blocks ? */
- if (sbi->s_blocks_per_group == clustersize << 3)
- set_opt2(sb, STD_GROUP_SIZE);
+ if (ext4_handle_clustersize(sb))
+ goto failed_mount;
/*
* Test whether we have more sectors than will fit in sector_t,
@@ -4896,111 +5217,12 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
goto failed_mount;
}
- if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
- goto cantfind_ext4;
-
- /* check blocks count against device size */
- blocks_count = sb_bdev_nr_blocks(sb);
- if (blocks_count && ext4_blocks_count(es) > blocks_count) {
- ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
- "exceeds size of device (%llu blocks)",
- ext4_blocks_count(es), blocks_count);
+ if (ext4_geometry_check(sb, es))
goto failed_mount;
- }
- /*
- * It makes no sense for the first data block to be beyond the end
- * of the filesystem.
- */
- if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
- ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
- "block %u is beyond end of filesystem (%llu)",
- le32_to_cpu(es->s_first_data_block),
- ext4_blocks_count(es));
- goto failed_mount;
- }
- if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
- (sbi->s_cluster_ratio == 1)) {
- ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
- "block is 0 with a 1k block and cluster size");
- goto failed_mount;
- }
-
- blocks_count = (ext4_blocks_count(es) -
- le32_to_cpu(es->s_first_data_block) +
- EXT4_BLOCKS_PER_GROUP(sb) - 1);
- do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
- if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
- ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
- "(block count %llu, first data block %u, "
- "blocks per group %lu)", blocks_count,
- ext4_blocks_count(es),
- le32_to_cpu(es->s_first_data_block),
- EXT4_BLOCKS_PER_GROUP(sb));
- goto failed_mount;
- }
- sbi->s_groups_count = blocks_count;
- sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
- (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
- if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
- le32_to_cpu(es->s_inodes_count)) {
- ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
- le32_to_cpu(es->s_inodes_count),
- ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
- ret = -EINVAL;
- goto failed_mount;
- }
- db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
- EXT4_DESC_PER_BLOCK(sb);
- if (ext4_has_feature_meta_bg(sb)) {
- if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
- ext4_msg(sb, KERN_WARNING,
- "first meta block group too large: %u "
- "(group descriptor block count %u)",
- le32_to_cpu(es->s_first_meta_bg), db_count);
- goto failed_mount;
- }
- }
- rcu_assign_pointer(sbi->s_group_desc,
- kvmalloc_array(db_count,
- sizeof(struct buffer_head *),
- GFP_KERNEL));
- if (sbi->s_group_desc == NULL) {
- ext4_msg(sb, KERN_ERR, "not enough memory");
- ret = -ENOMEM;
+ err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
+ if (err)
goto failed_mount;
- }
-
- bgl_lock_init(sbi->s_blockgroup_lock);
-
- /* Pre-read the descriptors into the buffer cache */
- for (i = 0; i < db_count; i++) {
- block = descriptor_loc(sb, logical_sb_block, i);
- ext4_sb_breadahead_unmovable(sb, block);
- }
-
- for (i = 0; i < db_count; i++) {
- struct buffer_head *bh;
-
- block = descriptor_loc(sb, logical_sb_block, i);
- bh = ext4_sb_bread_unmovable(sb, block);
- if (IS_ERR(bh)) {
- ext4_msg(sb, KERN_ERR,
- "can't read group descriptor %d", i);
- db_count = i;
- ret = PTR_ERR(bh);
- goto failed_mount2;
- }
- rcu_read_lock();
- rcu_dereference(sbi->s_group_desc)[i] = bh;
- rcu_read_unlock();
- }
- sbi->s_gdb_count = db_count;
- if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
- ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
- ret = -EFSCORRUPTED;
- goto failed_mount2;
- }
timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
spin_lock_init(&sbi->s_error_lock);
@@ -5038,24 +5260,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
- /* Initialize fast commit stuff */
- atomic_set(&sbi->s_fc_subtid, 0);
- INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
- INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
- INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
- INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
- sbi->s_fc_bytes = 0;
- ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
- sbi->s_fc_ineligible_tid = 0;
- spin_lock_init(&sbi->s_fc_lock);
- memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
- sbi->s_fc_replay_state.fc_regions = NULL;
- sbi->s_fc_replay_state.fc_regions_size = 0;
- sbi->s_fc_replay_state.fc_regions_used = 0;
- sbi->s_fc_replay_state.fc_regions_valid = 0;
- sbi->s_fc_replay_state.fc_modified_inodes = NULL;
- sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
- sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
+ ext4_fast_commit_init(sb);
sb->s_root = NULL;
@@ -5072,37 +5277,37 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
* root first: it may be modified in the journal!
*/
if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
- err = ext4_load_journal(sb, es, ctx->journal_devnum);
+ err = ext4_load_and_init_journal(sb, es, ctx);
if (err)
goto failed_mount3a;
} else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
ext4_has_feature_journal_needs_recovery(sb)) {
ext4_msg(sb, KERN_ERR, "required journal recovery "
"suppressed and not mounted read-only");
- goto failed_mount_wq;
+ goto failed_mount3a;
} else {
/* Nojournal mode, all journal mount options are illegal */
if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"journal_checksum, fs mounted w/o journal");
- goto failed_mount_wq;
+ goto failed_mount3a;
}
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"journal_async_commit, fs mounted w/o journal");
- goto failed_mount_wq;
+ goto failed_mount3a;
}
if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"commit=%lu, fs mounted w/o journal",
sbi->s_commit_interval / HZ);
- goto failed_mount_wq;
+ goto failed_mount3a;
}
if (EXT4_MOUNT_DATA_FLAGS &
(sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"data=, fs mounted w/o journal");
- goto failed_mount_wq;
+ goto failed_mount3a;
}
sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
clear_opt(sb, JOURNAL_CHECKSUM);
@@ -5110,76 +5315,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
clear_opt2(sb, JOURNAL_FAST_COMMIT);
sbi->s_journal = NULL;
needs_recovery = 0;
- goto no_journal;
- }
-
- if (ext4_has_feature_64bit(sb) &&
- !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
- JBD2_FEATURE_INCOMPAT_64BIT)) {
- ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
- goto failed_mount_wq;
- }
-
- if (!set_journal_csum_feature_set(sb)) {
- ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
- "feature set");
- goto failed_mount_wq;
- }
-
- if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
- !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
- JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
- ext4_msg(sb, KERN_ERR,
- "Failed to set fast commit journal feature");
- goto failed_mount_wq;
- }
-
- /* We have now updated the journal if required, so we can
- * validate the data journaling mode. */
- switch (test_opt(sb, DATA_FLAGS)) {
- case 0:
- /* No mode set, assume a default based on the journal
- * capabilities: ORDERED_DATA if the journal can
- * cope, else JOURNAL_DATA
- */
- if (jbd2_journal_check_available_features
- (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
- set_opt(sb, ORDERED_DATA);
- sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
- } else {
- set_opt(sb, JOURNAL_DATA);
- sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
- }
- break;
-
- case EXT4_MOUNT_ORDERED_DATA:
- case EXT4_MOUNT_WRITEBACK_DATA:
- if (!jbd2_journal_check_available_features
- (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
- ext4_msg(sb, KERN_ERR, "Journal does not support "
- "requested data journaling mode");
- goto failed_mount_wq;
- }
- break;
- default:
- break;
- }
-
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
- test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "journal_async_commit in data=ordered mode");
- goto failed_mount_wq;
}
- set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
-
- sbi->s_journal->j_submit_inode_data_buffers =
- ext4_journal_submit_inode_data_buffers;
- sbi->s_journal->j_finish_inode_data_buffers =
- ext4_journal_finish_inode_data_buffers;
-
-no_journal:
if (!test_opt(sb, NO_MBCACHE)) {
sbi->s_ea_block_cache = ext4_xattr_create_cache();
if (!sbi->s_ea_block_cache) {
@@ -5198,7 +5335,7 @@ no_journal:
}
}
- if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
+ if (ext4_has_feature_verity(sb) && sb->s_blocksize != PAGE_SIZE) {
ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
goto failed_mount_wq;
}
@@ -5408,11 +5545,6 @@ no_journal:
return 0;
-cantfind_ext4:
- if (!silent)
- ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
- goto failed_mount;
-
failed_mount9:
ext4_release_orphan_info(sb);
failed_mount8:
@@ -5466,13 +5598,7 @@ failed_mount3:
flush_work(&sbi->s_error_work);
del_timer_sync(&sbi->s_err_report);
ext4_stop_mmpd(sbi);
-failed_mount2:
- rcu_read_lock();
- group_desc = rcu_dereference(sbi->s_group_desc);
- for (i = 0; i < db_count; i++)
- brelse(group_desc[i]);
- kvfree(group_desc);
- rcu_read_unlock();
+ ext4_group_desc_free(sbi);
failed_mount:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
@@ -5487,7 +5613,7 @@ failed_mount:
#endif
fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
/* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
- brelse(bh);
+ brelse(sbi->s_sbh);
ext4_blkdev_remove(sbi);
out_fail:
sb->s_fs_info = NULL;
@@ -6653,7 +6779,7 @@ static int ext4_write_info(struct super_block *sb, int type)
handle_t *handle;
/* Data block + inode block */
- handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
+ handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_commit_info(sb, type);
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index b051d19b5c8a..3c640bd7ecae 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -298,16 +298,14 @@ static int ext4_get_verity_descriptor_location(struct inode *inode,
last_extent = path[path->p_depth].p_ext;
if (!last_extent) {
EXT4_ERROR_INODE(inode, "verity file has no extents");
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
return -EFSCORRUPTED;
}
end_lblk = le32_to_cpu(last_extent->ee_block) +
ext4_ext_get_actual_len(last_extent);
desc_size_pos = (u64)end_lblk << inode->i_blkbits;
- ext4_ext_drop_refs(path);
- kfree(path);
+ ext4_free_ext_path(path);
if (desc_size_pos < sizeof(desc_size_disk))
goto bad;
@@ -365,13 +363,14 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
pgoff_t index,
unsigned long num_ra_pages)
{
- DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
struct page *page;
index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
if (!page || !PageUptodate(page)) {
+ DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
+
if (page)
put_page(page);
else if (num_ra_pages > 1)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 533216e80fa2..36d6ba7190b6 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2412,6 +2412,7 @@ retry_inode:
if (!error) {
ext4_xattr_update_super_block(handle, inode->i_sb);
inode->i_ctime = current_time(inode);
+ inode_inc_iversion(inode);
if (!value)
no_expand = 0;
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index eaa240b21f07..5bbc44a5216e 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -219,7 +219,7 @@ static int f2fs_acl_update_mode(struct user_namespace *mnt_userns,
return error;
if (error == 0)
*acl = NULL;
- if (!in_group_p(i_gid_into_mnt(mnt_userns, inode)) &&
+ if (!vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode)) &&
!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
mode &= ~S_ISGID;
*mode_p = mode;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 8259e0fa97e1..0c82dae082aa 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -26,12 +26,16 @@
static struct kmem_cache *ino_entry_slab;
struct kmem_cache *f2fs_inode_entry_slab;
-void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
+void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
+ unsigned char reason)
{
f2fs_build_fault_attr(sbi, 0, 0);
set_ckpt_flags(sbi, CP_ERROR_FLAG);
- if (!end_io)
+ if (!end_io) {
f2fs_flush_merged_writes(sbi);
+
+ f2fs_handle_stop(sbi, reason);
+ }
}
/*
@@ -89,7 +93,7 @@ repeat:
return ERR_PTR(err);
}
- f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, NULL, FS_META_READ_IO, F2FS_BLKSIZE);
lock_page(page);
if (unlikely(page->mapping != mapping)) {
@@ -122,7 +126,7 @@ retry:
if (PTR_ERR(page) == -EIO &&
++count <= DEFAULT_RETRY_IO_COUNT)
goto retry;
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_META_PAGE);
}
return page;
}
@@ -140,7 +144,7 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
unsigned int segno, offset;
bool exist;
- if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ)
+ if (type == DATA_GENERIC)
return true;
segno = GET_SEGNO(sbi, blkaddr);
@@ -148,6 +152,13 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
se = get_seg_entry(sbi, segno);
exist = f2fs_test_bit(offset, se->cur_valid_map);
+ if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) {
+ f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
+ blkaddr, exist);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return exist;
+ }
+
if (!exist && type == DATA_GENERIC_ENHANCE) {
f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
blkaddr, exist);
@@ -185,6 +196,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
case DATA_GENERIC:
case DATA_GENERIC_ENHANCE:
case DATA_GENERIC_ENHANCE_READ:
+ case DATA_GENERIC_ENHANCE_UPDATE:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
blkaddr < MAIN_BLKADDR(sbi))) {
f2fs_warn(sbi, "access invalid blkaddr:%u",
@@ -276,7 +288,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
f2fs_put_page(page, err ? 1 : 0);
if (!err)
- f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, NULL, FS_META_READ_IO,
+ F2FS_BLKSIZE);
}
out:
blk_finish_plug(&plug);
@@ -448,8 +461,7 @@ static bool f2fs_dirty_meta_folio(struct address_space *mapping,
if (!folio_test_uptodate(folio))
folio_mark_uptodate(folio);
- if (!folio_test_dirty(folio)) {
- filemap_dirty_folio(mapping, folio);
+ if (filemap_dirty_folio(mapping, folio)) {
inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_META);
set_page_private_reference(&folio->page);
return true;
@@ -1053,7 +1065,8 @@ void f2fs_remove_dirty_inode(struct inode *inode)
spin_unlock(&sbi->inode_lock[type]);
}
-int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
+int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type,
+ bool from_cp)
{
struct list_head *head;
struct inode *inode;
@@ -1088,11 +1101,15 @@ retry:
if (inode) {
unsigned long cur_ino = inode->i_ino;
- F2FS_I(inode)->cp_task = current;
+ if (from_cp)
+ F2FS_I(inode)->cp_task = current;
+ F2FS_I(inode)->wb_task = current;
filemap_fdatawrite(inode->i_mapping);
- F2FS_I(inode)->cp_task = NULL;
+ F2FS_I(inode)->wb_task = NULL;
+ if (from_cp)
+ F2FS_I(inode)->cp_task = NULL;
iput(inode);
/* We need to give cpu to another writers. */
@@ -1221,7 +1238,7 @@ retry_flush_dents:
/* write all the dirty dentry pages */
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
f2fs_unlock_all(sbi);
- err = f2fs_sync_dirty_inodes(sbi, DIR_INODE);
+ err = f2fs_sync_dirty_inodes(sbi, DIR_INODE, true);
if (err)
return err;
cond_resched();
@@ -1892,15 +1909,27 @@ int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi)
void f2fs_stop_ckpt_thread(struct f2fs_sb_info *sbi)
{
struct ckpt_req_control *cprc = &sbi->cprc_info;
+ struct task_struct *ckpt_task;
+
+ if (!cprc->f2fs_issue_ckpt)
+ return;
- if (cprc->f2fs_issue_ckpt) {
- struct task_struct *ckpt_task = cprc->f2fs_issue_ckpt;
+ ckpt_task = cprc->f2fs_issue_ckpt;
+ cprc->f2fs_issue_ckpt = NULL;
+ kthread_stop(ckpt_task);
- cprc->f2fs_issue_ckpt = NULL;
- kthread_stop(ckpt_task);
+ f2fs_flush_ckpt_thread(sbi);
+}
- flush_remained_ckpt_reqs(sbi, NULL);
- }
+void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi)
+{
+ struct ckpt_req_control *cprc = &sbi->cprc_info;
+
+ flush_remained_ckpt_reqs(sbi, NULL);
+
+ /* Let's wait for the previous dispatched checkpoint. */
+ while (atomic_read(&cprc->queued_ckpt))
+ io_schedule_timeout(DEFAULT_IO_TIMEOUT);
}
void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 70e97075e535..d315c2de136f 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -762,6 +762,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
if (dic->clen > PAGE_SIZE * dic->nr_cpages - COMPRESS_HEADER_SIZE) {
ret = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION);
goto out_release;
}
@@ -912,17 +913,15 @@ bool f2fs_sanity_check_cluster(struct dnode_of_data *dn)
reason = "[C|*|C|*]";
goto out;
}
- if (compressed) {
- if (!__is_valid_data_blkaddr(blkaddr)) {
- if (!cluster_end)
- cluster_end = i;
- continue;
- }
- /* [COMPR_ADDR, NULL_ADDR or NEW_ADDR, valid_blkaddr] */
- if (cluster_end) {
- reason = "[C|N|N|V]";
- goto out;
- }
+ if (!__is_valid_data_blkaddr(blkaddr)) {
+ if (!cluster_end)
+ cluster_end = i;
+ continue;
+ }
+ /* [COMPR_ADDR, NULL_ADDR or NEW_ADDR, valid_blkaddr] */
+ if (cluster_end) {
+ reason = "[C|N|N|V]";
+ goto out;
}
}
return false;
@@ -952,6 +951,7 @@ static int __f2fs_cluster_blocks(struct inode *inode,
if (f2fs_sanity_check_cluster(&dn)) {
ret = -EFSCORRUPTED;
+ f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_CLUSTER);
goto fail;
}
@@ -1568,12 +1568,8 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic,
if (!dic->cbuf)
return -ENOMEM;
- if (cops->init_decompress_ctx) {
- int ret = cops->init_decompress_ctx(dic);
-
- if (ret)
- return ret;
- }
+ if (cops->init_decompress_ctx)
+ return cops->init_decompress_ctx(dic);
return 0;
}
@@ -1905,7 +1901,7 @@ bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, struct page *page,
void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct address_space *mapping = sbi->compress_inode->i_mapping;
+ struct address_space *mapping = COMPRESS_MAPPING(sbi);
struct folio_batch fbatch;
pgoff_t index = 0;
pgoff_t end = MAX_BLKADDR(sbi);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index aa3ccddfa037..a71e818cd67b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -139,7 +139,7 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
continue;
}
- /* PG_error was set if decryption or verity failed. */
+ /* PG_error was set if verity failed. */
if (bio->bi_status || PageError(page)) {
ClearPageUptodate(page);
/* will re-read again later */
@@ -185,7 +185,7 @@ static void f2fs_verify_bio(struct work_struct *work)
struct page *page = bv->bv_page;
if (!f2fs_is_compressed_page(page) &&
- !PageError(page) && !fsverity_verify_page(page))
+ !fsverity_verify_page(page))
SetPageError(page);
}
} else {
@@ -236,10 +236,9 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
bio_for_each_segment_all(bv, ctx->bio, iter_all) {
struct page *page = bv->bv_page;
- /* PG_error was set if decryption failed. */
if (f2fs_is_compressed_page(page))
- f2fs_end_read_compressed_page(page, PageError(page),
- blkaddr, in_task);
+ f2fs_end_read_compressed_page(page, false, blkaddr,
+ in_task);
else
all_compressed = false;
@@ -259,14 +258,17 @@ static void f2fs_post_read_work(struct work_struct *work)
{
struct bio_post_read_ctx *ctx =
container_of(work, struct bio_post_read_ctx, work);
+ struct bio *bio = ctx->bio;
- if (ctx->enabled_steps & STEP_DECRYPT)
- fscrypt_decrypt_bio(ctx->bio);
+ if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) {
+ f2fs_finish_read_bio(bio, true);
+ return;
+ }
if (ctx->enabled_steps & STEP_DECOMPRESS)
f2fs_handle_step_decompress(ctx, true);
- f2fs_verify_and_finish_bio(ctx->bio, true);
+ f2fs_verify_and_finish_bio(bio, true);
}
static void f2fs_read_end_io(struct bio *bio)
@@ -333,7 +335,8 @@ static void f2fs_write_end_io(struct bio *bio)
mempool_free(page, sbi->write_io_dummy);
if (unlikely(bio->bi_status))
- f2fs_stop_checkpoint(sbi, true);
+ f2fs_stop_checkpoint(sbi, true,
+ STOP_CP_REASON_WRITE_FAIL);
continue;
}
@@ -349,7 +352,8 @@ static void f2fs_write_end_io(struct bio *bio)
if (unlikely(bio->bi_status)) {
mapping_set_error(page->mapping, -EIO);
if (type == F2FS_WB_CP_DATA)
- f2fs_stop_checkpoint(sbi, true);
+ f2fs_stop_checkpoint(sbi, true,
+ STOP_CP_REASON_WRITE_FAIL);
}
f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
@@ -703,8 +707,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
fio->is_por ? META_POR : (__is_meta_io(fio) ?
- META_GENERIC : DATA_GENERIC_ENHANCE)))
+ META_GENERIC : DATA_GENERIC_ENHANCE))) {
+ f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
+ }
trace_f2fs_submit_page_bio(page, fio);
@@ -723,7 +729,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
inc_page_count(fio->sbi, is_read_io(fio->op) ?
- __read_io_type(page): WB_DATA_TYPE(fio->page));
+ __read_io_type(page) : WB_DATA_TYPE(fio->page));
__submit_bio(fio->sbi, bio, fio->type);
return 0;
@@ -904,8 +910,10 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
fio->encrypted_page : fio->page;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
- __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) {
+ f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
+ }
trace_f2fs_submit_page_bio(page, fio);
@@ -1083,7 +1091,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
}
ClearPageError(page);
inc_page_count(sbi, F2FS_RD_DATA);
- f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
__submit_bio(sbi, bio, DATA);
return 0;
}
@@ -1215,6 +1223,8 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
DATA_GENERIC_ENHANCE_READ)) {
err = -EFSCORRUPTED;
+ f2fs_handle_error(F2FS_I_SB(inode),
+ ERROR_INVALID_BLKADDR);
goto put_err;
}
goto got_it;
@@ -1235,6 +1245,8 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
dn.data_blkaddr,
DATA_GENERIC_ENHANCE)) {
err = -EFSCORRUPTED;
+ f2fs_handle_error(F2FS_I_SB(inode),
+ ERROR_INVALID_BLKADDR);
goto put_err;
}
got_it:
@@ -1548,6 +1560,7 @@ next_block:
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto sync_out;
}
@@ -1593,6 +1606,8 @@ next_block:
(flag != F2FS_GET_BLOCK_FIEMAP ||
IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi,
+ ERROR_CORRUPTED_CLUSTER);
goto sync_out;
}
if (flag == F2FS_GET_BLOCK_BMAP) {
@@ -1816,7 +1831,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
- if (err || err == 1)
+ if (err)
return err;
}
@@ -2074,6 +2089,8 @@ got_it:
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
DATA_GENERIC_ENHANCE_READ)) {
ret = -EFSCORRUPTED;
+ f2fs_handle_error(F2FS_I_SB(inode),
+ ERROR_INVALID_BLKADDR);
goto out;
}
} else {
@@ -2122,7 +2139,8 @@ submit_and_realloc:
goto submit_and_realloc;
inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
- f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
+ F2FS_BLKSIZE);
ClearPageError(page);
*last_block_in_bio = block_nr;
goto out;
@@ -2270,8 +2288,7 @@ submit_and_realloc:
refcount_inc(&dic->refcnt);
inc_page_count(sbi, F2FS_RD_DATA);
- f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
- f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE);
ClearPageError(page);
*last_block_in_bio = blkaddr;
}
@@ -2543,7 +2560,7 @@ bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
return true;
/* if this is cold file, we should overwrite to avoid fragmentation */
- if (file_is_cold(inode))
+ if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE))
return true;
return check_inplace_update_policy(inode, fio);
@@ -2617,8 +2634,11 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
- DATA_GENERIC_ENHANCE))
+ DATA_GENERIC_ENHANCE)) {
+ f2fs_handle_error(fio->sbi,
+ ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
+ }
ipu_force = true;
fio->need_lock = LOCK_DONE;
@@ -2646,6 +2666,7 @@ got_it:
!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
DATA_GENERIC_ENHANCE)) {
err = -EFSCORRUPTED;
+ f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
goto out_writepage;
}
@@ -2856,7 +2877,7 @@ out:
}
unlock_page(page);
if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
- !F2FS_I(inode)->cp_task && allow_balance)
+ !F2FS_I(inode)->wb_task && allow_balance)
f2fs_balance_fs(sbi, need_balance_fs);
if (unlikely(f2fs_cp_error(sbi))) {
@@ -3156,7 +3177,7 @@ static inline bool __should_serialize_io(struct inode *inode,
struct writeback_control *wbc)
{
/* to avoid deadlock in path of data flush */
- if (F2FS_I(inode)->cp_task)
+ if (F2FS_I(inode)->wb_task)
return false;
if (!S_ISREG(inode->i_mode))
@@ -3559,6 +3580,7 @@ repeat:
if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
DATA_GENERIC_ENHANCE_READ)) {
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto fail;
}
err = f2fs_submit_page_read(inode, page, blkaddr, 0, true);
@@ -3697,8 +3719,7 @@ static bool f2fs_dirty_data_folio(struct address_space *mapping,
folio_mark_uptodate(folio);
BUG_ON(folio_test_swapcache(folio));
- if (!folio_test_dirty(folio)) {
- filemap_dirty_folio(mapping, folio);
+ if (filemap_dirty_folio(mapping, folio)) {
f2fs_update_dirty_folio(inode, folio);
return true;
}
@@ -3970,6 +3991,7 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
if (ret < 0)
return ret;
+ stat_inc_swapfile_inode(inode);
set_inode_flag(inode, FI_PIN_FILE);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return ret;
@@ -3979,6 +4001,7 @@ static void f2fs_swap_deactivate(struct file *file)
{
struct inode *inode = file_inode(file);
+ stat_dec_swapfile_inode(inode);
clear_inode_flag(inode, FI_PIN_FILE);
}
#else
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index c01471573977..a216dcdf6941 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -91,7 +91,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->nquota_files = sbi->nquota_files;
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
- si->aw_cnt = sbi->atomic_files;
+ si->aw_cnt = atomic_read(&sbi->atomic_files);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ);
si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE);
@@ -135,6 +135,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir);
si->compr_inode = atomic_read(&sbi->compr_inode);
+ si->swapfile_inode = atomic_read(&sbi->swapfile_inode);
si->compr_blocks = atomic64_read(&sbi->compr_blocks);
si->append = sbi->im[APPEND_INO].ino_num;
si->update = sbi->im[UPDATE_INO].ino_num;
@@ -347,7 +348,7 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
si->sbi->sb->s_bdev, i++,
- f2fs_readonly(si->sbi->sb) ? "RO": "RW",
+ f2fs_readonly(si->sbi->sb) ? "RO" : "RW",
is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
"Disabled" : (f2fs_cp_error(si->sbi) ? "Error" : "Good"));
if (si->sbi->s_flag) {
@@ -385,6 +386,8 @@ static int stat_show(struct seq_file *s, void *v)
si->inline_dir);
seq_printf(s, " - Compressed Inode: %u, Blocks: %llu\n",
si->compr_inode, si->compr_blocks);
+ seq_printf(s, " - Swapfile Inode: %u\n",
+ si->swapfile_inode);
seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
si->orphans, si->append, si->update);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
@@ -607,6 +610,8 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->compr_inode, 0);
atomic64_set(&sbi->compr_blocks, 0);
+ atomic_set(&sbi->swapfile_inode, 0);
+ atomic_set(&sbi->atomic_files, 0);
atomic_set(&sbi->inplace_count, 0);
for (i = META_CP; i < META_MAX; i++)
atomic_set(&sbi->meta_count[i], 0);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index d5bd7932fb64..21960a899b6a 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -1041,6 +1041,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
__func__, le16_to_cpu(de->name_len));
set_sbi_flag(sbi, SBI_NEED_FSCK);
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_CORRUPTED_DIRENT);
goto out;
}
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 866e72b29bd5..932c070173b9 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -544,7 +544,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
if (!et)
return;
- trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
+ trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len, 0);
write_lock(&et->lock);
@@ -583,7 +583,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
org_end = dei.fofs + dei.len;
f2fs_bug_on(sbi, pos >= org_end);
- if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
+ if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
en->ei.len = pos - en->ei.fofs;
prev_en = en;
parts = 1;
@@ -675,7 +675,7 @@ void f2fs_update_extent_tree_range_compressed(struct inode *inode,
struct rb_node **insert_p = NULL, *insert_parent = NULL;
bool leftmost = false;
- trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen);
+ trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen, c_len);
/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
if (is_inode_flag_set(inode, FI_NO_EXTENT))
@@ -804,9 +804,8 @@ void f2fs_drop_extent_tree(struct inode *inode)
if (!f2fs_may_extent_tree(inode))
return;
- set_inode_flag(inode, FI_NO_EXTENT);
-
write_lock(&et->lock);
+ set_inode_flag(inode, FI_NO_EXTENT);
__free_extent_tree(sbi, et);
if (et->largest.len) {
et->largest.len = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 3c7cdb70fe2e..e6355a5683b7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -266,6 +266,10 @@ enum {
* condition of read on truncated area
* by extent_cache
*/
+ DATA_GENERIC_ENHANCE_UPDATE, /*
+ * strong check on range and segment
+ * bitmap for update case
+ */
META_GENERIC,
};
@@ -274,7 +278,7 @@ enum {
ORPHAN_INO, /* for orphan ino list */
APPEND_INO, /* for append ino list */
UPDATE_INO, /* for update ino list */
- TRANS_DIR_INO, /* for trasactions dir ino list */
+ TRANS_DIR_INO, /* for transactions dir ino list */
FLUSH_INO, /* for multiple device flushing */
MAX_INO_ENTRY, /* max. list */
};
@@ -782,6 +786,7 @@ struct f2fs_inode_info {
unsigned int clevel; /* maximum level of given file name */
struct task_struct *task; /* lookup and create consistency */
struct task_struct *cp_task; /* separate cp/wb IO stats*/
+ struct task_struct *wb_task; /* indicate inode is in context of writeback */
nid_t i_xattr_nid; /* node id that contains xattrs */
loff_t last_disk_size; /* lastly written file size */
spinlock_t i_size_lock; /* protect last_disk_size */
@@ -1158,7 +1163,10 @@ enum iostat_type {
APP_BUFFERED_IO, /* app buffered write IOs */
APP_WRITE_IO, /* app write IOs */
APP_MAPPED_IO, /* app mapped IOs */
+ APP_BUFFERED_CDATA_IO, /* app buffered write IOs on compressed file */
+ APP_MAPPED_CDATA_IO, /* app mapped write IOs on compressed file */
FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */
+ FS_CDATA_IO, /* data IOs from kworker/fsync/reclaimer on compressed file */
FS_NODE_IO, /* node IOs from kworker/fsync/reclaimer */
FS_META_IO, /* meta IOs from kworker/reclaimer */
FS_GC_DATA_IO, /* data IOs from forground gc */
@@ -1172,6 +1180,8 @@ enum iostat_type {
APP_BUFFERED_READ_IO, /* app buffered read IOs */
APP_READ_IO, /* app read IOs */
APP_MAPPED_READ_IO, /* app mapped read IOs */
+ APP_BUFFERED_CDATA_READ_IO, /* app buffered read IOs on compressed file */
+ APP_MAPPED_CDATA_READ_IO, /* app mapped read IOs on compressed file */
FS_DATA_READ_IO, /* data read IOs */
FS_GDATA_READ_IO, /* data read IOs from background gc */
FS_CDATA_READ_IO, /* compressed data read IOs */
@@ -1247,7 +1257,6 @@ enum inode_type {
DIR_INODE, /* for dirty dir inode */
FILE_INODE, /* for dirty regular/symlink inode */
DIRTY_META, /* for all dirtied inode metadata */
- ATOMIC_FILE, /* for all atomic files */
NR_INODE_TYPE,
};
@@ -1726,11 +1735,9 @@ struct f2fs_sb_info {
unsigned int gc_mode; /* current GC state */
unsigned int next_victim_seg[2]; /* next segment in victim section */
spinlock_t gc_urgent_high_lock;
- bool gc_urgent_high_limited; /* indicates having limited trial count */
unsigned int gc_urgent_high_remaining; /* remaining trial count for GC_URGENT_HIGH */
/* for skip statistic */
- unsigned int atomic_files; /* # of opened atomic file */
unsigned long long skipped_gc_rwsem; /* FG_GC only */
/* threshold for gc trials on pinned files */
@@ -1761,6 +1768,8 @@ struct f2fs_sb_info {
atomic_t inline_dir; /* # of inline_dentry inodes */
atomic_t compr_inode; /* # of compressed inodes */
atomic64_t compr_blocks; /* # of compressed blocks */
+ atomic_t swapfile_inode; /* # of swapfile inodes */
+ atomic_t atomic_files; /* # of opened atomic file */
atomic_t max_aw_cnt; /* max # of atomic writes */
unsigned int io_skip_bggc; /* skip background gc for in-flight IO */
unsigned int other_skip_bggc; /* skip background gc for other reasons */
@@ -1806,6 +1815,10 @@ struct f2fs_sb_info {
struct workqueue_struct *post_read_wq; /* post read workqueue */
+ unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
+ spinlock_t error_lock; /* protect errors array */
+ bool error_dirty; /* errors of sb is dirty */
+
struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
unsigned int inline_xattr_slab_size; /* default inline xattr slab size */
@@ -2525,7 +2538,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
if (__cp_payload(sbi) > 0) {
if (flag == NAT_BITMAP)
- return &ckpt->sit_nat_version_bitmap;
+ return tmp_ptr;
else
return (unsigned char *)ckpt + F2FS_BLKSIZE;
} else {
@@ -3547,6 +3560,8 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
int f2fs_quota_sync(struct super_block *sb, int type);
loff_t max_file_blocks(struct inode *inode);
void f2fs_quota_off_umount(struct super_block *sb);
+void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
+void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi);
@@ -3706,7 +3721,9 @@ static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi)
/*
* checkpoint.c
*/
-void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
+void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
+ unsigned char reason);
+void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi);
struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
@@ -3736,7 +3753,8 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi);
int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi);
void f2fs_update_dirty_folio(struct inode *inode, struct folio *folio);
void f2fs_remove_dirty_inode(struct inode *inode);
-int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
+int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type,
+ bool from_cp);
void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type);
u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi);
int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
@@ -3858,7 +3876,7 @@ struct f2fs_stat_info {
int nr_issued_ckpt, nr_total_ckpt, nr_queued_ckpt;
unsigned int cur_ckpt_time, peak_ckpt_time;
int inline_xattr, inline_inode, inline_dir, append, update, orphans;
- int compr_inode;
+ int compr_inode, swapfile_inode;
unsigned long long compr_blocks;
int aw_cnt, max_aw_cnt;
unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
@@ -3947,6 +3965,14 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
(atomic64_add(blocks, &F2FS_I_SB(inode)->compr_blocks))
#define stat_sub_compr_blocks(inode, blocks) \
(atomic64_sub(blocks, &F2FS_I_SB(inode)->compr_blocks))
+#define stat_inc_swapfile_inode(inode) \
+ (atomic_inc(&F2FS_I_SB(inode)->swapfile_inode))
+#define stat_dec_swapfile_inode(inode) \
+ (atomic_dec(&F2FS_I_SB(inode)->swapfile_inode))
+#define stat_inc_atomic_inode(inode) \
+ (atomic_inc(&F2FS_I_SB(inode)->atomic_files))
+#define stat_dec_atomic_inode(inode) \
+ (atomic_dec(&F2FS_I_SB(inode)->atomic_files))
#define stat_inc_meta_count(sbi, blkaddr) \
do { \
if (blkaddr < SIT_I(sbi)->sit_base_addr) \
@@ -3966,7 +3992,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
(atomic_inc(&(sbi)->inplace_count))
#define stat_update_max_atomic_write(inode) \
do { \
- int cur = F2FS_I_SB(inode)->atomic_files; \
+ int cur = atomic_read(&F2FS_I_SB(inode)->atomic_files); \
int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt); \
if (cur > max) \
atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \
@@ -4031,6 +4057,10 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi);
#define stat_dec_compr_inode(inode) do { } while (0)
#define stat_add_compr_blocks(inode, blocks) do { } while (0)
#define stat_sub_compr_blocks(inode, blocks) do { } while (0)
+#define stat_inc_swapfile_inode(inode) do { } while (0)
+#define stat_dec_swapfile_inode(inode) do { } while (0)
+#define stat_inc_atomic_inode(inode) do { } while (0)
+#define stat_dec_atomic_inode(inode) do { } while (0)
#define stat_update_max_atomic_write(inode) do { } while (0)
#define stat_inc_meta_count(sbi, blkaddr) do { } while (0)
#define stat_inc_seg_type(sbi, curseg) do { } while (0)
@@ -4471,17 +4501,6 @@ static inline void f2fs_i_compr_blocks_update(struct inode *inode,
f2fs_mark_inode_dirty_sync(inode, true);
}
-static inline int block_unaligned_IO(struct inode *inode,
- struct kiocb *iocb, struct iov_iter *iter)
-{
- unsigned int i_blkbits = READ_ONCE(inode->i_blkbits);
- unsigned int blocksize_mask = (1 << i_blkbits) - 1;
- loff_t offset = iocb->ki_pos;
- unsigned long align = offset | iov_iter_alignment(iter);
-
- return align & blocksize_mask;
-}
-
static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi,
int flag)
{
@@ -4492,35 +4511,6 @@ static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi,
return sbi->aligned_blksize;
}
-static inline bool f2fs_force_buffered_io(struct inode *inode,
- struct kiocb *iocb, struct iov_iter *iter)
-{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- int rw = iov_iter_rw(iter);
-
- if (!fscrypt_dio_supported(iocb, iter))
- return true;
- if (fsverity_active(inode))
- return true;
- if (f2fs_compressed_file(inode))
- return true;
-
- /* disallow direct IO if any of devices has unaligned blksize */
- if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
- return true;
-
- if (f2fs_lfs_mode(sbi) && (rw == WRITE)) {
- if (block_unaligned_IO(inode, iocb, iter))
- return true;
- if (F2FS_IO_ALIGNED(sbi))
- return true;
- }
- if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))
- return true;
-
- return false;
-}
-
static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
{
return fsverity_active(inode) &&
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index ce4905a073b3..82cda1258227 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -43,8 +43,8 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
ret = filemap_fault(vmf);
if (!ret)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO,
- F2FS_BLKSIZE);
+ f2fs_update_iostat(F2FS_I_SB(inode), inode,
+ APP_MAPPED_READ_IO, F2FS_BLKSIZE);
trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret);
@@ -154,7 +154,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
if (!PageUptodate(page))
SetPageUptodate(page);
- f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE);
f2fs_update_time(sbi, REQ_TIME);
trace_f2fs_vm_page_mkwrite(page, DATA);
@@ -808,6 +808,34 @@ int f2fs_truncate(struct inode *inode)
return 0;
}
+static bool f2fs_force_buffered_io(struct inode *inode, int rw)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (!fscrypt_dio_supported(inode))
+ return true;
+ if (fsverity_active(inode))
+ return true;
+ if (f2fs_compressed_file(inode))
+ return true;
+
+ /* disallow direct IO if any of devices has unaligned blksize */
+ if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
+ return true;
+ /*
+ * for blkzoned device, fallback direct IO to buffered IO, so
+ * all IOs can be serialized by log-structured write.
+ */
+ if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE))
+ return true;
+ if (f2fs_lfs_mode(sbi) && rw == WRITE && F2FS_IO_ALIGNED(sbi))
+ return true;
+ if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+ return true;
+
+ return false;
+}
+
int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
@@ -824,6 +852,24 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
}
+ /*
+ * Return the DIO alignment restrictions if requested. We only return
+ * this information when requested, since on encrypted files it might
+ * take a fair bit of work to get if the file wasn't opened recently.
+ *
+ * f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN
+ * cannot represent that, so in that case we report no DIO support.
+ */
+ if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
+ unsigned int bsize = i_blocksize(inode);
+
+ stat->result_mask |= STATX_DIOALIGN;
+ if (!f2fs_force_buffered_io(inode, WRITE)) {
+ stat->dio_mem_align = bsize;
+ stat->dio_offset_align = bsize;
+ }
+ }
+
flags = fi->i_flags;
if (flags & F2FS_COMPR_FL)
stat->attributes |= STATX_ATTR_COMPRESSED;
@@ -871,9 +917,10 @@ static void __setattr_copy(struct user_namespace *mnt_userns,
inode->i_ctime = attr->ia_ctime;
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
- if (!in_group_p(kgid) && !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
+ if (!vfsgid_in_group_p(vfsgid) &&
+ !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
mode &= ~S_ISGID;
set_acl_inode(inode, mode);
}
@@ -1155,6 +1202,7 @@ next_dnode:
!f2fs_is_valid_blkaddr(sbi, *blkaddr,
DATA_GENERIC_ENHANCE)) {
f2fs_put_dnode(&dn);
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
}
@@ -1439,6 +1487,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr,
DATA_GENERIC_ENHANCE)) {
ret = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
break;
}
@@ -2048,9 +2097,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
}
f2fs_i_size_write(fi->cow_inode, i_size_read(inode));
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- sbi->atomic_files++;
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+ stat_inc_atomic_inode(inode);
set_inode_flag(inode, FI_ATOMIC_FILE);
set_inode_flag(fi->cow_inode, FI_COW_FILE);
@@ -2144,7 +2191,8 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (ret) {
if (ret == -EROFS) {
ret = 0;
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_stop_checkpoint(sbi, false,
+ STOP_CP_REASON_SHUTDOWN);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
trace_f2fs_shutdown(sbi, in, ret);
}
@@ -2157,7 +2205,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
ret = freeze_bdev(sb->s_bdev);
if (ret)
goto out;
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
thaw_bdev(sb->s_bdev);
break;
@@ -2166,16 +2214,16 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
ret = f2fs_sync_fs(sb, 1);
if (ret)
goto out;
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NOSYNC:
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_METAFLUSH:
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NEED_FSCK:
@@ -3321,8 +3369,10 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
if (!__is_valid_data_blkaddr(blkaddr))
continue;
if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
- DATA_GENERIC_ENHANCE)))
+ DATA_GENERIC_ENHANCE))) {
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
+ }
}
while (count) {
@@ -3483,8 +3533,10 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
if (!__is_valid_data_blkaddr(blkaddr))
continue;
if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
- DATA_GENERIC_ENHANCE)))
+ DATA_GENERIC_ENHANCE))) {
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
+ }
}
while (count) {
@@ -3756,6 +3808,8 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
DATA_GENERIC_ENHANCE)) {
ret = -EFSCORRUPTED;
f2fs_put_dnode(&dn);
+ f2fs_handle_error(sbi,
+ ERROR_INVALID_BLKADDR);
goto out;
}
@@ -4182,7 +4236,7 @@ static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
if (!(iocb->ki_flags & IOCB_DIRECT))
return false;
- if (f2fs_force_buffered_io(inode, iocb, iter))
+ if (f2fs_force_buffered_io(inode, iov_iter_rw(iter)))
return false;
/*
@@ -4212,7 +4266,7 @@ static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
dec_page_count(sbi, F2FS_DIO_READ);
if (error)
return error;
- f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, size);
+ f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size);
return 0;
}
@@ -4301,7 +4355,8 @@ skip_read_trace:
} else {
ret = filemap_read(iocb, to, 0);
if (ret > 0)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
+ f2fs_update_iostat(F2FS_I_SB(inode), inode,
+ APP_BUFFERED_READ_IO, ret);
}
if (trace_f2fs_dataread_end_enabled())
trace_f2fs_dataread_end(inode, pos, ret);
@@ -4418,7 +4473,8 @@ static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
if (ret > 0) {
iocb->ki_pos += ret;
- f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret);
+ f2fs_update_iostat(F2FS_I_SB(inode), inode,
+ APP_BUFFERED_IO, ret);
}
return ret;
}
@@ -4431,7 +4487,7 @@ static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
dec_page_count(sbi, F2FS_DIO_WRITE);
if (error)
return error;
- f2fs_update_iostat(sbi, APP_DIRECT_IO, size);
+ f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size);
return 0;
}
@@ -4619,7 +4675,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
skip_write_trace:
/* Do the actual write. */
ret = dio ?
- f2fs_dio_write_iter(iocb, from, &may_need_sync):
+ f2fs_dio_write_iter(iocb, from, &may_need_sync) :
f2fs_buffered_write_iter(iocb, from);
if (trace_f2fs_datawrite_end_enabled())
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 6da21d405ce1..4546e01b2ee0 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -74,7 +74,8 @@ static int gc_thread_func(void *data)
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_stop_checkpoint(sbi, false,
+ STOP_CP_REASON_FAULT_INJECT);
}
if (!sb_start_write_trylock(sbi->sb)) {
@@ -97,14 +98,10 @@ static int gc_thread_func(void *data)
*/
if (sbi->gc_mode == GC_URGENT_HIGH) {
spin_lock(&sbi->gc_urgent_high_lock);
- if (sbi->gc_urgent_high_limited) {
- if (!sbi->gc_urgent_high_remaining) {
- sbi->gc_urgent_high_limited = false;
- spin_unlock(&sbi->gc_urgent_high_lock);
- sbi->gc_mode = GC_NORMAL;
- continue;
- }
+ if (sbi->gc_urgent_high_remaining) {
sbi->gc_urgent_high_remaining--;
+ if (!sbi->gc_urgent_high_remaining)
+ sbi->gc_mode = GC_NORMAL;
}
spin_unlock(&sbi->gc_urgent_high_lock);
}
@@ -285,7 +282,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
/* let's select beginning hot/small space first in no_heap mode*/
if (f2fs_need_rand_seg(sbi))
- p->offset = prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
+ p->offset = prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec);
else if (test_opt(sbi, NOHEAP) &&
(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
p->offset = 0;
@@ -1082,7 +1079,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
{
struct page *node_page;
nid_t nid;
- unsigned int ofs_in_node;
+ unsigned int ofs_in_node, max_addrs;
block_t source_blkaddr;
nid = le32_to_cpu(sum->nid);
@@ -1108,6 +1105,14 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return false;
}
+ max_addrs = IS_INODE(node_page) ? DEF_ADDRS_PER_INODE :
+ DEF_ADDRS_PER_BLOCK;
+ if (ofs_in_node >= max_addrs) {
+ f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%u, nid:%u, max:%u",
+ ofs_in_node, dni->ino, dni->nid, max_addrs);
+ return false;
+ }
+
*nofs = ofs_of_node(node_page);
source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node);
f2fs_put_page(node_page, 1);
@@ -1159,6 +1164,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC_ENHANCE_READ))) {
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto put_page;
}
goto got_it;
@@ -1177,6 +1183,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC_ENHANCE))) {
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto put_page;
}
got_it:
@@ -1206,8 +1213,8 @@ got_it:
f2fs_put_page(fio.encrypted_page, 0);
f2fs_put_page(page, 1);
- f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
- f2fs_update_iostat(sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, NULL, FS_GDATA_READ_IO, F2FS_BLKSIZE);
return 0;
put_encrypted_page:
@@ -1307,8 +1314,10 @@ static int move_data_block(struct inode *inode, block_t bidx,
goto up_out;
}
- f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
- f2fs_update_iostat(fio.sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(fio.sbi, inode, FS_DATA_READ_IO,
+ F2FS_BLKSIZE);
+ f2fs_update_iostat(fio.sbi, NULL, FS_GDATA_READ_IO,
+ F2FS_BLKSIZE);
lock_page(mpage);
if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) ||
@@ -1360,7 +1369,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
goto put_page_out;
}
- f2fs_update_iostat(fio.sbi, FS_GC_DATA_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(fio.sbi, NULL, FS_GC_DATA_IO, F2FS_BLKSIZE);
f2fs_update_data_blkaddr(&dn, newaddr);
set_inode_flag(inode, FI_APPEND_WRITE);
@@ -1706,7 +1715,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
segno, type, GET_SUM_TYPE((&sum->footer)));
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_stop_checkpoint(sbi, false,
+ STOP_CP_REASON_CORRUPTED_SUMMARY);
goto skip;
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index bf46a7dfbea2..21a495234ffd 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -64,7 +64,6 @@ bool f2fs_may_inline_dentry(struct inode *inode)
void f2fs_do_read_inline_data(struct page *page, struct page *ipage)
{
struct inode *inode = page->mapping->host;
- void *src_addr, *dst_addr;
if (PageUptodate(page))
return;
@@ -74,11 +73,8 @@ void f2fs_do_read_inline_data(struct page *page, struct page *ipage)
zero_user_segment(page, MAX_INLINE_DATA(inode), PAGE_SIZE);
/* Copy the whole inline data block */
- src_addr = inline_data_addr(inode, ipage);
- dst_addr = kmap_atomic(page);
- memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
- flush_dcache_page(page);
- kunmap_atomic(dst_addr);
+ memcpy_to_page(page, 0, inline_data_addr(inode, ipage),
+ MAX_INLINE_DATA(inode));
if (!PageUptodate(page))
SetPageUptodate(page);
}
@@ -164,6 +160,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
__func__, dn->inode->i_ino, dn->data_blkaddr);
+ f2fs_handle_error(fio.sbi, ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
}
@@ -246,7 +243,6 @@ out:
int f2fs_write_inline_data(struct inode *inode, struct page *page)
{
- void *src_addr, *dst_addr;
struct dnode_of_data dn;
int err;
@@ -263,10 +259,8 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
f2fs_bug_on(F2FS_I_SB(inode), page->index);
f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true);
- src_addr = kmap_atomic(page);
- dst_addr = inline_data_addr(inode, dn.inode_page);
- memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
- kunmap_atomic(src_addr);
+ memcpy_from_page(inline_data_addr(inode, dn.inode_page),
+ page, 0, MAX_INLINE_DATA(inode));
set_page_dirty(dn.inode_page);
f2fs_clear_page_cache_dirty_tag(page);
@@ -419,6 +413,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
__func__, dir->i_ino, dn.data_blkaddr);
+ f2fs_handle_error(F2FS_P_SB(page), ERROR_INVALID_BLKADDR);
err = -EFSCORRUPTED;
goto out;
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 6d11c365d7b4..9f0d3864d9f1 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -81,8 +81,10 @@ static int __written_first_block(struct f2fs_sb_info *sbi,
if (!__is_valid_data_blkaddr(addr))
return 1;
- if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE))
+ if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) {
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
+ }
return 0;
}
@@ -333,6 +335,16 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return true;
}
+static void init_idisk_time(struct inode *inode)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+
+ fi->i_disk_time[0] = inode->i_atime;
+ fi->i_disk_time[1] = inode->i_ctime;
+ fi->i_disk_time[2] = inode->i_mtime;
+ fi->i_disk_time[3] = fi->i_crtime;
+}
+
static int do_read_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -405,6 +417,7 @@ static int do_read_inode(struct inode *inode)
if (!sanity_check_inode(inode, node_page)) {
f2fs_put_page(node_page, 1);
+ f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
return -EFSCORRUPTED;
}
@@ -465,10 +478,7 @@ static int do_read_inode(struct inode *inode)
}
}
- fi->i_disk_time[0] = inode->i_atime;
- fi->i_disk_time[1] = inode->i_ctime;
- fi->i_disk_time[2] = inode->i_mtime;
- fi->i_disk_time[3] = fi->i_crtime;
+ init_idisk_time(inode);
f2fs_put_page(node_page, 1);
stat_inc_inline_xattr(inode);
@@ -480,6 +490,12 @@ static int do_read_inode(struct inode *inode)
return 0;
}
+static bool is_meta_ino(struct f2fs_sb_info *sbi, unsigned int ino)
+{
+ return ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi) ||
+ ino == F2FS_COMPRESS_INO(sbi);
+}
+
struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -491,16 +507,22 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
return ERR_PTR(-ENOMEM);
if (!(inode->i_state & I_NEW)) {
+ if (is_meta_ino(sbi, ino)) {
+ f2fs_err(sbi, "inaccessible inode: %lu, run fsck to repair", ino);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ ret = -EFSCORRUPTED;
+ trace_f2fs_iget_exit(inode, ret);
+ iput(inode);
+ f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
+ return ERR_PTR(ret);
+ }
+
trace_f2fs_iget(inode);
return inode;
}
- if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi))
- goto make_now;
-#ifdef CONFIG_F2FS_FS_COMPRESSION
- if (ino == F2FS_COMPRESS_INO(sbi))
+ if (is_meta_ino(sbi, ino))
goto make_now;
-#endif
ret = do_read_inode(inode);
if (ret)
@@ -676,11 +698,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
if (inode->i_nlink == 0)
clear_page_private_inline(node_page);
- F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
- F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
- F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
- F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
-
+ init_idisk_time(inode);
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
#endif
@@ -699,7 +717,8 @@ retry:
cond_resched();
goto retry;
} else if (err != -ENOENT) {
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_stop_checkpoint(sbi, false,
+ STOP_CP_REASON_UPDATE_INODE);
}
return;
}
diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
index d84c5f6cc09d..3166a8939ed4 100644
--- a/fs/f2fs/iostat.c
+++ b/fs/f2fs/iostat.c
@@ -31,55 +31,65 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
/* print app write IOs */
seq_puts(seq, "[WRITE]\n");
- seq_printf(seq, "app buffered: %-16llu\n",
+ seq_printf(seq, "app buffered data: %-16llu\n",
sbi->rw_iostat[APP_BUFFERED_IO]);
- seq_printf(seq, "app direct: %-16llu\n",
+ seq_printf(seq, "app direct data: %-16llu\n",
sbi->rw_iostat[APP_DIRECT_IO]);
- seq_printf(seq, "app mapped: %-16llu\n",
+ seq_printf(seq, "app mapped data: %-16llu\n",
sbi->rw_iostat[APP_MAPPED_IO]);
+ seq_printf(seq, "app buffered cdata: %-16llu\n",
+ sbi->rw_iostat[APP_BUFFERED_CDATA_IO]);
+ seq_printf(seq, "app mapped cdata: %-16llu\n",
+ sbi->rw_iostat[APP_MAPPED_CDATA_IO]);
/* print fs write IOs */
- seq_printf(seq, "fs data: %-16llu\n",
+ seq_printf(seq, "fs data: %-16llu\n",
sbi->rw_iostat[FS_DATA_IO]);
- seq_printf(seq, "fs node: %-16llu\n",
+ seq_printf(seq, "fs cdata: %-16llu\n",
+ sbi->rw_iostat[FS_CDATA_IO]);
+ seq_printf(seq, "fs node: %-16llu\n",
sbi->rw_iostat[FS_NODE_IO]);
- seq_printf(seq, "fs meta: %-16llu\n",
+ seq_printf(seq, "fs meta: %-16llu\n",
sbi->rw_iostat[FS_META_IO]);
- seq_printf(seq, "fs gc data: %-16llu\n",
+ seq_printf(seq, "fs gc data: %-16llu\n",
sbi->rw_iostat[FS_GC_DATA_IO]);
- seq_printf(seq, "fs gc node: %-16llu\n",
+ seq_printf(seq, "fs gc node: %-16llu\n",
sbi->rw_iostat[FS_GC_NODE_IO]);
- seq_printf(seq, "fs cp data: %-16llu\n",
+ seq_printf(seq, "fs cp data: %-16llu\n",
sbi->rw_iostat[FS_CP_DATA_IO]);
- seq_printf(seq, "fs cp node: %-16llu\n",
+ seq_printf(seq, "fs cp node: %-16llu\n",
sbi->rw_iostat[FS_CP_NODE_IO]);
- seq_printf(seq, "fs cp meta: %-16llu\n",
+ seq_printf(seq, "fs cp meta: %-16llu\n",
sbi->rw_iostat[FS_CP_META_IO]);
/* print app read IOs */
seq_puts(seq, "[READ]\n");
- seq_printf(seq, "app buffered: %-16llu\n",
+ seq_printf(seq, "app buffered data: %-16llu\n",
sbi->rw_iostat[APP_BUFFERED_READ_IO]);
- seq_printf(seq, "app direct: %-16llu\n",
+ seq_printf(seq, "app direct data: %-16llu\n",
sbi->rw_iostat[APP_DIRECT_READ_IO]);
- seq_printf(seq, "app mapped: %-16llu\n",
+ seq_printf(seq, "app mapped data: %-16llu\n",
sbi->rw_iostat[APP_MAPPED_READ_IO]);
+ seq_printf(seq, "app buffered cdata: %-16llu\n",
+ sbi->rw_iostat[APP_BUFFERED_CDATA_READ_IO]);
+ seq_printf(seq, "app mapped cdata: %-16llu\n",
+ sbi->rw_iostat[APP_MAPPED_CDATA_READ_IO]);
/* print fs read IOs */
- seq_printf(seq, "fs data: %-16llu\n",
+ seq_printf(seq, "fs data: %-16llu\n",
sbi->rw_iostat[FS_DATA_READ_IO]);
- seq_printf(seq, "fs gc data: %-16llu\n",
+ seq_printf(seq, "fs gc data: %-16llu\n",
sbi->rw_iostat[FS_GDATA_READ_IO]);
- seq_printf(seq, "fs compr_data: %-16llu\n",
+ seq_printf(seq, "fs cdata: %-16llu\n",
sbi->rw_iostat[FS_CDATA_READ_IO]);
- seq_printf(seq, "fs node: %-16llu\n",
+ seq_printf(seq, "fs node: %-16llu\n",
sbi->rw_iostat[FS_NODE_READ_IO]);
- seq_printf(seq, "fs meta: %-16llu\n",
+ seq_printf(seq, "fs meta: %-16llu\n",
sbi->rw_iostat[FS_META_READ_IO]);
/* print other IOs */
seq_puts(seq, "[OTHER]\n");
- seq_printf(seq, "fs discard: %-16llu\n",
+ seq_printf(seq, "fs discard: %-16llu\n",
sbi->rw_iostat[FS_DISCARD]);
return 0;
@@ -159,7 +169,7 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
spin_unlock_irq(&sbi->iostat_lat_lock);
}
-void f2fs_update_iostat(struct f2fs_sb_info *sbi,
+void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
enum iostat_type type, unsigned long long io_bytes)
{
unsigned long flags;
@@ -176,6 +186,28 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi,
if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO)
sbi->rw_iostat[APP_READ_IO] += io_bytes;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (inode && f2fs_compressed_file(inode)) {
+ if (type == APP_BUFFERED_IO)
+ sbi->rw_iostat[APP_BUFFERED_CDATA_IO] += io_bytes;
+
+ if (type == APP_BUFFERED_READ_IO)
+ sbi->rw_iostat[APP_BUFFERED_CDATA_READ_IO] += io_bytes;
+
+ if (type == APP_MAPPED_READ_IO)
+ sbi->rw_iostat[APP_MAPPED_CDATA_READ_IO] += io_bytes;
+
+ if (type == APP_MAPPED_IO)
+ sbi->rw_iostat[APP_MAPPED_CDATA_IO] += io_bytes;
+
+ if (type == FS_DATA_READ_IO)
+ sbi->rw_iostat[FS_CDATA_READ_IO] += io_bytes;
+
+ if (type == FS_DATA_IO)
+ sbi->rw_iostat[FS_CDATA_IO] += io_bytes;
+ }
+#endif
+
spin_unlock_irqrestore(&sbi->iostat_lock, flags);
f2fs_record_iostat(sbi);
diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
index 22a2d01f57ef..2c048307b6e0 100644
--- a/fs/f2fs/iostat.h
+++ b/fs/f2fs/iostat.h
@@ -31,7 +31,7 @@ struct iostat_lat_info {
extern int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
void *offset);
extern void f2fs_reset_iostat(struct f2fs_sb_info *sbi);
-extern void f2fs_update_iostat(struct f2fs_sb_info *sbi,
+extern void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
enum iostat_type type, unsigned long long io_bytes);
struct bio_iostat_ctx {
@@ -65,7 +65,7 @@ extern void f2fs_destroy_iostat_processing(void);
extern int f2fs_init_iostat(struct f2fs_sb_info *sbi);
extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
#else
-static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
+static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
enum iostat_type type, unsigned long long io_bytes) {}
static inline void iostat_update_and_unbind_ctx(struct bio *bio, int rw) {}
static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index bf00d5057abb..a389772fd212 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -50,7 +50,7 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
F2FS_I(inode)->i_crtime = inode->i_mtime;
- inode->i_generation = prandom_u32();
+ inode->i_generation = get_random_u32();
if (S_ISDIR(inode->i_mode))
F2FS_I(inode)->i_current_depth = 1;
@@ -845,7 +845,7 @@ out:
}
static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode, bool is_whiteout,
+ struct file *file, umode_t mode, bool is_whiteout,
struct inode **new_inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
@@ -892,8 +892,8 @@ static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
inode->i_state |= I_LINKABLE;
spin_unlock(&inode->i_lock);
} else {
- if (dentry)
- d_tmpfile(dentry, inode);
+ if (file)
+ d_tmpfile(file, inode);
else
f2fs_i_links_write(inode, false);
}
@@ -915,16 +915,19 @@ out:
}
static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+ struct file *file, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ int err;
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
- return __f2fs_tmpfile(mnt_userns, dir, dentry, mode, false, NULL);
+ err = __f2fs_tmpfile(mnt_userns, dir, file, mode, false, NULL);
+
+ return finish_open_simple(file, err);
}
static int f2fs_create_whiteout(struct user_namespace *mnt_userns,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index e06a0c478b39..983572f23896 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -36,6 +36,7 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.",
__func__, nid);
+ f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
return -EFSCORRUPTED;
}
return 0;
@@ -585,7 +586,7 @@ retry:
ne = nat_in_journal(journal, i);
node_info_from_raw_nat(ni, &ne);
}
- up_read(&curseg->journal_rwsem);
+ up_read(&curseg->journal_rwsem);
if (i >= 0) {
f2fs_up_read(&nm_i->nat_tree_lock);
goto cache;
@@ -1295,6 +1296,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
if (unlikely(new_ni.blk_addr != NULL_ADDR)) {
err = -EFSCORRUPTED;
set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto fail;
}
#endif
@@ -1369,7 +1371,7 @@ static int read_node_page(struct page *page, blk_opf_t op_flags)
err = f2fs_submit_page_bio(&fio);
if (!err)
- f2fs_update_iostat(sbi, FS_NODE_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, NULL, FS_NODE_READ_IO, F2FS_BLKSIZE);
return err;
}
@@ -2147,8 +2149,7 @@ static bool f2fs_dirty_node_folio(struct address_space *mapping,
if (IS_INODE(&folio->page))
f2fs_inode_chksum_set(F2FS_M_SB(mapping), &folio->page);
#endif
- if (!folio_test_dirty(folio)) {
- filemap_dirty_folio(mapping, folio);
+ if (filemap_dirty_folio(mapping, folio)) {
inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
set_page_private_reference(&folio->page);
return true;
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index dcd0a1e35095..dea95b48b647 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -474,7 +474,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
struct dnode_of_data tdn = *dn;
nid_t ino, nid;
struct inode *inode;
- unsigned int offset;
+ unsigned int offset, ofs_in_node, max_addrs;
block_t bidx;
int i;
@@ -501,15 +501,25 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
got_it:
/* Use the locked dnode page and inode */
nid = le32_to_cpu(sum.nid);
+ ofs_in_node = le16_to_cpu(sum.ofs_in_node);
+
+ max_addrs = ADDRS_PER_PAGE(dn->node_page, dn->inode);
+ if (ofs_in_node >= max_addrs) {
+ f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u",
+ ofs_in_node, dn->inode->i_ino, nid, max_addrs);
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUMMARY);
+ return -EFSCORRUPTED;
+ }
+
if (dn->inode->i_ino == nid) {
tdn.nid = nid;
if (!dn->inode_page_locked)
lock_page(dn->inode_page);
tdn.node_page = dn->inode_page;
- tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
+ tdn.ofs_in_node = ofs_in_node;
goto truncate_out;
} else if (dn->nid == nid) {
- tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
+ tdn.ofs_in_node = ofs_in_node;
goto truncate_out;
}
@@ -628,6 +638,7 @@ retry_dn:
inode->i_ino, ofs_of_node(dn.node_page),
ofs_of_node(page));
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
goto err;
}
@@ -640,12 +651,14 @@ retry_dn:
if (__is_valid_data_blkaddr(src) &&
!f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto err;
}
if (__is_valid_data_blkaddr(dest) &&
!f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto err;
}
@@ -698,6 +711,16 @@ retry_prev:
goto err;
}
+ if (f2fs_is_valid_blkaddr(sbi, dest,
+ DATA_GENERIC_ENHANCE_UPDATE)) {
+ f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u",
+ dest, inode->i_ino, dn.ofs_in_node);
+ err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi,
+ ERROR_INVALID_BLKADDR);
+ goto err;
+ }
+
/* write dummy data page */
f2fs_replace_block(sbi, &dn, src, dest,
ni.version, false, false);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0de21f82d7bc..acf3d3fa4363 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -187,7 +187,6 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
void f2fs_abort_atomic_write(struct inode *inode, bool clean)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
if (!f2fs_is_atomic_file(inode))
@@ -200,10 +199,7 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean)
fi->cow_inode = NULL;
release_atomic_write_cnt(inode);
clear_inode_flag(inode, FI_ATOMIC_FILE);
-
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- sbi->atomic_files--;
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+ stat_dec_atomic_inode(inode);
}
static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
@@ -312,6 +308,8 @@ static int __f2fs_commit_atomic_write(struct inode *inode)
DATA_GENERIC_ENHANCE)) {
f2fs_put_dnode(&dn);
ret = -EFSCORRUPTED;
+ f2fs_handle_error(sbi,
+ ERROR_INVALID_BLKADDR);
goto out;
}
@@ -376,7 +374,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
}
/* balance_fs_bg is able to be pending */
@@ -476,12 +474,12 @@ do_sync:
mutex_lock(&sbi->flush_lock);
blk_start_plug(&plug);
- f2fs_sync_dirty_inodes(sbi, FILE_INODE);
+ f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
blk_finish_plug(&plug);
mutex_unlock(&sbi->flush_lock);
}
- f2fs_sync_fs(sbi->sb, true);
+ f2fs_sync_fs(sbi->sb, 1);
stat_inc_bg_cp_count(sbi->stat_info);
}
@@ -694,7 +692,8 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
} while (ret && --count);
if (ret) {
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_stop_checkpoint(sbi, false,
+ STOP_CP_REASON_FLUSH_FAIL);
break;
}
@@ -1171,7 +1170,7 @@ submit:
atomic_inc(&dcc->issued_discard);
- f2fs_update_iostat(sbi, FS_DISCARD, 1);
+ f2fs_update_iostat(sbi, NULL, FS_DISCARD, 1);
lstart += len;
start += len;
@@ -2535,7 +2534,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
sanity_check_seg_type(sbi, seg_type);
if (f2fs_need_rand_seg(sbi))
- return prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
+ return prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec);
/* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi))
@@ -2589,7 +2588,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
curseg->alloc_type = LFS;
if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
curseg->fragment_remained_chunk =
- prandom_u32() % sbi->max_fragment_chunk + 1;
+ prandom_u32_max(sbi->max_fragment_chunk) + 1;
}
static int __next_free_blkoff(struct f2fs_sb_info *sbi,
@@ -2626,9 +2625,9 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
/* To allocate block chunks in different sizes, use random number */
if (--seg->fragment_remained_chunk <= 0) {
seg->fragment_remained_chunk =
- prandom_u32() % sbi->max_fragment_chunk + 1;
+ prandom_u32_max(sbi->max_fragment_chunk) + 1;
seg->next_blkoff +=
- prandom_u32() % sbi->max_fragment_hole + 1;
+ prandom_u32_max(sbi->max_fragment_hole) + 1;
}
}
}
@@ -3388,7 +3387,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
f2fs_submit_page_write(&fio);
stat_inc_meta_count(sbi, page->index);
- f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
}
void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
@@ -3398,7 +3397,7 @@ void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
set_summary(&sum, nid, 0, 0);
do_write_page(&sum, fio);
- f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+ f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE);
}
void f2fs_outplace_write_data(struct dnode_of_data *dn,
@@ -3412,7 +3411,7 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn,
do_write_page(&sum, fio);
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
- f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE);
}
int f2fs_inplace_write_data(struct f2fs_io_info *fio)
@@ -3432,6 +3431,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
__func__, segno);
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
goto drop_bio;
}
@@ -3453,7 +3453,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
if (!err) {
f2fs_update_device_state(fio->sbi, fio->ino,
fio->new_blkaddr, 1);
- f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+ f2fs_update_iostat(fio->sbi, fio->page->mapping->host,
+ fio->io_type, F2FS_BLKSIZE);
}
return err;
@@ -4379,6 +4380,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
if (se->type >= NR_PERSISTENT_LOG) {
f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
se->type, start);
+ f2fs_handle_error(sbi,
+ ERROR_INCONSISTENT_SUM_TYPE);
return -EFSCORRUPTED;
}
@@ -4415,6 +4418,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
f2fs_err(sbi, "Wrong journal entry on segno %u",
start);
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL);
break;
}
@@ -4434,6 +4438,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
se->type, start);
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
break;
}
@@ -4465,6 +4470,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
sit_valid_blocks[NODE], valid_node_count(sbi));
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT);
return -EFSCORRUPTED;
}
@@ -4473,6 +4479,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
sit_valid_blocks[DATA], sit_valid_blocks[NODE],
valid_user_blocks(sbi));
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT);
return -EFSCORRUPTED;
}
@@ -4623,6 +4630,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
f2fs_err(sbi,
"Current segment has invalid alloc_type:%d",
curseg->alloc_type);
+ f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
return -EFSCORRUPTED;
}
@@ -4640,6 +4648,7 @@ out:
"Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
i, curseg->segno, curseg->alloc_type,
curseg->next_blkoff, blkofs);
+ f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
return -EFSCORRUPTED;
}
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index d1d63766f2c7..be8f2d7d007b 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -753,6 +753,7 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
f2fs_err(sbi, "Mismatch valid blocks %d vs. %d",
GET_SIT_VBLOCKS(raw_sit), valid_blocks);
set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_SIT);
return -EFSCORRUPTED;
}
@@ -767,6 +768,7 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
f2fs_err(sbi, "Wrong valid blocks %d or segno %u",
GET_SIT_VBLOCKS(raw_sit), segno);
set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_SIT);
return -EFSCORRUPTED;
}
return 0;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2451623c05a7..3834ead04620 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -301,10 +301,10 @@ static void f2fs_destroy_casefold_cache(void) { }
static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
{
- block_t limit = min((sbi->user_block_count << 1) / 1000,
+ block_t limit = min((sbi->user_block_count >> 3),
sbi->user_block_count - sbi->reserved_blocks);
- /* limit is 0.2% */
+ /* limit is 12.5% */
if (test_opt(sbi, RESERVE_ROOT) &&
F2FS_OPTION(sbi).root_reserved_blocks > limit) {
F2FS_OPTION(sbi).root_reserved_blocks = limit;
@@ -1342,6 +1342,11 @@ default_check:
return -EINVAL;
}
+ if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) {
+ f2fs_err(sbi, "LFS not compatible with ATGC");
+ return -EINVAL;
+ }
+
if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) {
f2fs_err(sbi, "Allow to mount readonly mode only");
return -EROFS;
@@ -1666,9 +1671,8 @@ static int f2fs_freeze(struct super_block *sb)
if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
return -EINVAL;
- /* ensure no checkpoint required */
- if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list))
- return -EINVAL;
+ /* Let's flush checkpoints and stop the thread. */
+ f2fs_flush_ckpt_thread(F2FS_SB(sb));
/* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */
set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
@@ -2181,6 +2185,9 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
f2fs_up_write(&sbi->gc_lock);
f2fs_sync_fs(sbi->sb, 1);
+
+ /* Let's ensure there's no pending checkpoint anymore */
+ f2fs_flush_ckpt_thread(sbi);
}
static int f2fs_remount(struct super_block *sb, int *flags, char *data)
@@ -2346,6 +2353,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
f2fs_stop_ckpt_thread(sbi);
need_restart_ckpt = true;
} else {
+ /* Flush if the prevous checkpoint, if exists. */
+ f2fs_flush_ckpt_thread(sbi);
+
err = f2fs_start_ckpt_thread(sbi);
if (err) {
f2fs_err(sbi,
@@ -2465,7 +2475,6 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
size_t toread;
loff_t i_size = i_size_read(inode);
struct page *page;
- char *kaddr;
if (off > i_size)
return 0;
@@ -2498,9 +2507,7 @@ repeat:
return -EIO;
}
- kaddr = kmap_atomic(page);
- memcpy(data, kaddr + offset, tocopy);
- kunmap_atomic(kaddr);
+ memcpy_from_page(data, page, offset, tocopy);
f2fs_put_page(page, 1);
offset = 0;
@@ -2522,7 +2529,6 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
size_t towrite = len;
struct page *page;
void *fsdata = NULL;
- char *kaddr;
int err = 0;
int tocopy;
@@ -2541,10 +2547,7 @@ retry:
break;
}
- kaddr = kmap_atomic(page);
- memcpy(kaddr + offset, data, tocopy);
- kunmap_atomic(kaddr);
- flush_dcache_page(page);
+ memcpy_to_page(page, offset, data, tocopy);
a_ops->write_end(NULL, mapping, off, tocopy, tocopy,
page, fsdata);
@@ -3039,23 +3042,24 @@ static void f2fs_get_ino_and_lblk_bits(struct super_block *sb,
*lblk_bits_ret = 8 * sizeof(block_t);
}
-static int f2fs_get_num_devices(struct super_block *sb)
+static struct block_device **f2fs_get_devices(struct super_block *sb,
+ unsigned int *num_devs)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct block_device **devs;
+ int i;
- if (f2fs_is_multi_device(sbi))
- return sbi->s_ndevs;
- return 1;
-}
+ if (!f2fs_is_multi_device(sbi))
+ return NULL;
-static void f2fs_get_devices(struct super_block *sb,
- struct request_queue **devs)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
- int i;
+ devs = kmalloc_array(sbi->s_ndevs, sizeof(*devs), GFP_KERNEL);
+ if (!devs)
+ return ERR_PTR(-ENOMEM);
for (i = 0; i < sbi->s_ndevs; i++)
- devs[i] = bdev_get_queue(FDEV(i).bdev);
+ devs[i] = FDEV(i).bdev;
+ *num_devs = sbi->s_ndevs;
+ return devs;
}
static const struct fscrypt_operations f2fs_cryptops = {
@@ -3066,7 +3070,6 @@ static const struct fscrypt_operations f2fs_cryptops = {
.empty_dir = f2fs_empty_dir,
.has_stable_inodes = f2fs_has_stable_inodes,
.get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits,
- .get_num_devices = f2fs_get_num_devices,
.get_devices = f2fs_get_devices,
};
#endif
@@ -3843,6 +3846,68 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
return err;
}
+void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
+{
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+ int err;
+
+ f2fs_down_write(&sbi->sb_lock);
+
+ if (raw_super->s_stop_reason[reason] < ((1 << BITS_PER_BYTE) - 1))
+ raw_super->s_stop_reason[reason]++;
+
+ err = f2fs_commit_super(sbi, false);
+ if (err)
+ f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
+ reason, err);
+ f2fs_up_write(&sbi->sb_lock);
+}
+
+static void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
+{
+ spin_lock(&sbi->error_lock);
+ if (!test_bit(flag, (unsigned long *)sbi->errors)) {
+ set_bit(flag, (unsigned long *)sbi->errors);
+ sbi->error_dirty = true;
+ }
+ spin_unlock(&sbi->error_lock);
+}
+
+static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
+{
+ bool need_update = false;
+
+ spin_lock(&sbi->error_lock);
+ if (sbi->error_dirty) {
+ memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
+ MAX_F2FS_ERRORS);
+ sbi->error_dirty = false;
+ need_update = true;
+ }
+ spin_unlock(&sbi->error_lock);
+
+ return need_update;
+}
+
+void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
+{
+ int err;
+
+ f2fs_save_errors(sbi, error);
+
+ f2fs_down_write(&sbi->sb_lock);
+
+ if (!f2fs_update_errors(sbi))
+ goto out_unlock;
+
+ err = f2fs_commit_super(sbi, false);
+ if (err)
+ f2fs_err(sbi, "f2fs_commit_super fails to record errors:%u, err:%d",
+ error, err);
+out_unlock:
+ f2fs_up_write(&sbi->sb_lock);
+}
+
static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
@@ -4190,6 +4255,9 @@ try_onemore:
goto free_devices;
}
+ spin_lock_init(&sbi->error_lock);
+ memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
+
sbi->total_valid_node_count =
le32_to_cpu(sbi->ckpt->valid_node_count);
percpu_counter_set(&sbi->total_valid_inode_count,
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index eba5fb1629d7..df27afd71ef4 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -128,6 +128,12 @@ static ssize_t sb_status_show(struct f2fs_attr *a,
return sprintf(buf, "%lx\n", sbi->s_flag);
}
+static ssize_t cp_status_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ return sprintf(buf, "%x\n", le32_to_cpu(F2FS_CKPT(sbi)->ckpt_flags));
+}
+
static ssize_t pending_discard_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -527,7 +533,6 @@ out:
if (!strcmp(a->attr.name, "gc_urgent_high_remaining")) {
spin_lock(&sbi->gc_urgent_high_lock);
- sbi->gc_urgent_high_limited = t != 0;
sbi->gc_urgent_high_remaining = t;
spin_unlock(&sbi->gc_urgent_high_lock);
@@ -1030,8 +1035,10 @@ static struct attribute *f2fs_feat_attrs[] = {
ATTRIBUTE_GROUPS(f2fs_feat);
F2FS_GENERAL_RO_ATTR(sb_status);
+F2FS_GENERAL_RO_ATTR(cp_status);
static struct attribute *f2fs_stat_attrs[] = {
ATTR_LIST(sb_status),
+ ATTR_LIST(cp_status),
NULL,
};
ATTRIBUTE_GROUPS(f2fs_stat);
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index 7b8f2b41c29b..c352fff88a5e 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -47,16 +47,13 @@ static int pagecache_read(struct inode *inode, void *buf, size_t count,
size_t n = min_t(size_t, count,
PAGE_SIZE - offset_in_page(pos));
struct page *page;
- void *addr;
page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT,
NULL);
if (IS_ERR(page))
return PTR_ERR(page);
- addr = kmap_atomic(page);
- memcpy(buf, addr + offset_in_page(pos), n);
- kunmap_atomic(addr);
+ memcpy_from_page(buf, page, offset_in_page(pos), n);
put_page(page);
@@ -85,16 +82,13 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
PAGE_SIZE - offset_in_page(pos));
struct page *page;
void *fsdata;
- void *addr;
int res;
res = aops->write_begin(NULL, mapping, pos, n, &page, &fsdata);
if (res)
return res;
- addr = kmap_atomic(page);
- memcpy(addr + offset_in_page(pos), buf, n);
- kunmap_atomic(addr);
+ memcpy_to_page(page, offset_in_page(pos), buf, n);
res = aops->write_end(NULL, mapping, pos, n, n, page, fsdata);
if (res < 0)
@@ -246,6 +240,8 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes ||
pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) {
f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr");
+ f2fs_handle_error(F2FS_I_SB(inode),
+ ERROR_CORRUPTED_VERITY_XATTR);
return -EFSCORRUPTED;
}
if (buf_size) {
@@ -262,13 +258,14 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
pgoff_t index,
unsigned long num_ra_pages)
{
- DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
struct page *page;
index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
if (!page || !PageUptodate(page)) {
+ DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
+
if (page)
put_page(page);
else if (num_ra_pages > 1)
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index c76c15086e5f..dc2e8637189e 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -367,6 +367,8 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
err = -EFSCORRUPTED;
+ f2fs_handle_error(F2FS_I_SB(inode),
+ ERROR_CORRUPTED_XATTR);
goto out;
}
check:
@@ -583,6 +585,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
error = -EFSCORRUPTED;
+ f2fs_handle_error(F2FS_I_SB(inode),
+ ERROR_CORRUPTED_XATTR);
goto cleanup;
}
@@ -658,6 +662,8 @@ static int __f2fs_setxattr(struct inode *inode, int index,
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
error = -EFSCORRUPTED;
+ f2fs_handle_error(F2FS_I_SB(inode),
+ ERROR_CORRUPTED_XATTR);
goto exit;
}
@@ -684,6 +690,8 @@ static int __f2fs_setxattr(struct inode *inode, int index,
inode->i_ino, ENTRY_SIZE(last));
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
error = -EFSCORRUPTED;
+ f2fs_handle_error(F2FS_I_SB(inode),
+ ERROR_CORRUPTED_XATTR);
goto exit;
}
last = XATTR_NEXT_ENTRY(last);
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 249825017da7..00235b8a1823 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -705,7 +705,7 @@ static int fat_readdir(struct file *file, struct dir_context *ctx)
}
#define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type) \
-static int func(struct dir_context *ctx, const char *name, int name_len, \
+static bool func(struct dir_context *ctx, const char *name, int name_len, \
loff_t offset, u64 ino, unsigned int d_type) \
{ \
struct fat_ioctl_filldir_callback *buf = \
@@ -714,7 +714,7 @@ static int func(struct dir_context *ctx, const char *name, int name_len, \
struct dirent_type __user *d2 = d1 + 1; \
\
if (buf->result) \
- return -EINVAL; \
+ return false; \
buf->result++; \
\
if (name != NULL) { \
@@ -750,10 +750,10 @@ static int func(struct dir_context *ctx, const char *name, int name_len, \
put_user(short_len, &d1->d_reclen)) \
goto efault; \
} \
- return 0; \
+ return true; \
efault: \
buf->result = -EFAULT; \
- return -EFAULT; \
+ return false; \
}
FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent)
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 3e4eb3467cb4..8a6b493b5b5f 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -461,8 +461,9 @@ static int fat_allow_set_time(struct user_namespace *mnt_userns,
{
umode_t allow_utime = sbi->options.allow_utime;
- if (!uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) {
- if (in_group_p(i_gid_into_mnt(mnt_userns, inode)))
+ if (!vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode),
+ current_fsuid())) {
+ if (vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode)))
allow_utime >>= 3;
if (allow_utime & MAY_WRITE)
return 1;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a38238d75c08..1cbcc4608dc7 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -523,7 +523,7 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
inode->i_uid = sbi->options.fs_uid;
inode->i_gid = sbi->options.fs_gid;
inode_inc_iversion(inode);
- inode->i_generation = prandom_u32();
+ inode->i_generation = get_random_u32();
if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) {
inode->i_generation &= ~1;
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 6630c69c23a2..f2bc27d1975e 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -14,7 +14,7 @@
#include "internal.h"
#include "mount.h"
-static long do_sys_name_to_handle(struct path *path,
+static long do_sys_name_to_handle(const struct path *path,
struct file_handle __user *ufh,
int __user *mnt_id)
{
diff --git a/fs/file.c b/fs/file.c
index 3bcc1ecc314a..5f9c802a5d8d 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -980,6 +980,7 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret
*ret_fd = fd;
return file;
}
+EXPORT_SYMBOL(task_lookup_next_fd_rcu);
/*
* Lightweight file lookup - no refcnt increment if fd table isn't shared.
diff --git a/fs/file_table.c b/fs/file_table.c
index 99c6796c9f28..dd88701e54a9 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -324,12 +324,7 @@ static void __fput(struct file *file)
}
fops_put(file->f_op);
put_pid(file->f_owner.pid);
- if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
- i_readcount_dec(inode);
- if (mode & FMODE_WRITER) {
- put_write_access(inode);
- __mnt_drop_write(mnt);
- }
+ put_file_access(file);
dput(dentry);
if (unlikely(mode & FMODE_NEED_UNMOUNT))
dissolve_on_fput(mnt);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 08a1993ab7fd..443f83382b9b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1718,9 +1718,14 @@ static int writeback_single_inode(struct inode *inode,
*/
if (!(inode->i_state & I_DIRTY_ALL))
inode_cgwb_move_to_attached(inode, wb);
- else if (!(inode->i_state & I_SYNC_QUEUED) &&
- (inode->i_state & I_DIRTY))
- redirty_tail_locked(inode, wb);
+ else if (!(inode->i_state & I_SYNC_QUEUED)) {
+ if ((inode->i_state & I_DIRTY))
+ redirty_tail_locked(inode, wb);
+ else if (inode->i_state & I_DIRTY_TIME) {
+ inode->dirtied_when = jiffies;
+ inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
+ }
+ }
spin_unlock(&wb->list_lock);
inode_sync_complete(inode);
@@ -2370,6 +2375,20 @@ void __mark_inode_dirty(struct inode *inode, int flags)
if (flags & I_DIRTY_INODE) {
/*
+ * Inode timestamp update will piggback on this dirtying.
+ * We tell ->dirty_inode callback that timestamps need to
+ * be updated by setting I_DIRTY_TIME in flags.
+ */
+ if (inode->i_state & I_DIRTY_TIME) {
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & I_DIRTY_TIME) {
+ inode->i_state &= ~I_DIRTY_TIME;
+ flags |= I_DIRTY_TIME;
+ }
+ spin_unlock(&inode->i_lock);
+ }
+
+ /*
* Notify the filesystem about the inode being dirtied, so that
* (if needed) it can update on-disk fields and journal the
* inode. This is only needed when the inode itself is being
@@ -2378,7 +2397,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
*/
trace_writeback_dirty_inode_start(inode, flags);
if (sb->s_op->dirty_inode)
- sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
+ sb->s_op->dirty_inode(inode,
+ flags & (I_DIRTY_INODE | I_DIRTY_TIME));
trace_writeback_dirty_inode(inode, flags);
/* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
@@ -2399,21 +2419,15 @@ void __mark_inode_dirty(struct inode *inode, int flags)
*/
smp_mb();
- if (((inode->i_state & flags) == flags) ||
- (dirtytime && (inode->i_state & I_DIRTY_INODE)))
+ if ((inode->i_state & flags) == flags)
return;
spin_lock(&inode->i_lock);
- if (dirtytime && (inode->i_state & I_DIRTY_INODE))
- goto out_unlock_inode;
if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY;
inode_attach_wb(inode, NULL);
- /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
- if (flags & I_DIRTY_INODE)
- inode->i_state &= ~I_DIRTY_TIME;
inode->i_state |= flags;
/*
@@ -2486,7 +2500,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
out_unlock:
if (wb)
spin_unlock(&wb->list_lock);
-out_unlock_inode:
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(__mark_inode_dirty);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 51897427a534..b4a6e0a1b945 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -776,7 +776,8 @@ static int fuse_check_page(struct page *page)
1 << PG_active |
1 << PG_workingset |
1 << PG_reclaim |
- 1 << PG_waiters))) {
+ 1 << PG_waiters |
+ LRU_GEN_MASK | LRU_REFS_MASK))) {
dump_page(page, "fuse: trying to steal weird page");
return 1;
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b585b04e815e..bb97a384dc5d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -529,7 +529,7 @@ out_err:
*/
static int fuse_create_open(struct inode *dir, struct dentry *entry,
struct file *file, unsigned int flags,
- umode_t mode)
+ umode_t mode, u32 opcode)
{
int err;
struct inode *inode;
@@ -573,7 +573,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
}
- args.opcode = FUSE_CREATE;
+ args.opcode = opcode;
args.nodeid = get_node_id(dir);
args.in_numargs = 2;
args.in_args[0].size = sizeof(inarg);
@@ -676,7 +676,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
if (fc->no_create)
goto mknod;
- err = fuse_create_open(dir, entry, file, flags, mode);
+ err = fuse_create_open(dir, entry, file, flags, mode, FUSE_CREATE);
if (err == -ENOSYS) {
fc->no_create = 1;
goto mknod;
@@ -802,6 +802,23 @@ static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir,
return fuse_mknod(&init_user_ns, dir, entry, mode, 0);
}
+static int fuse_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+ struct file *file, umode_t mode)
+{
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ int err;
+
+ if (fc->no_tmpfile)
+ return -EOPNOTSUPP;
+
+ err = fuse_create_open(dir, file->f_path.dentry, file, file->f_flags, mode, FUSE_TMPFILE);
+ if (err == -ENOSYS) {
+ fc->no_tmpfile = 1;
+ err = -EOPNOTSUPP;
+ }
+ return err;
+}
+
static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
struct dentry *entry, umode_t mode)
{
@@ -1913,6 +1930,7 @@ static const struct inode_operations fuse_dir_inode_operations = {
.setattr = fuse_setattr,
.create = fuse_create,
.atomic_open = fuse_atomic_open,
+ .tmpfile = fuse_tmpfile,
.mknod = fuse_mknod,
.permission = fuse_permission,
.getattr = fuse_getattr,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 1a3afd469e3a..71bfb663aac5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3001,6 +3001,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
goto out;
}
+ err = file_modified(file);
+ if (err)
+ goto out;
+
if (!(mode & FALLOC_FL_KEEP_SIZE))
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 488b460e046f..98a9cf531873 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -784,6 +784,9 @@ struct fuse_conn {
/* Does the filesystem support per inode DAX? */
unsigned int inode_dax:1;
+ /* Is tmpfile not implemented by fs? */
+ unsigned int no_tmpfile:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index b4e565711045..e8deaacf1832 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -77,8 +77,10 @@ static void fuse_add_dirent_to_cache(struct file *file,
goto unlock;
addr = kmap_local_page(page);
- if (!offset)
+ if (!offset) {
clear_page(addr);
+ SetPageUptodate(page);
+ }
memcpy(addr + offset, dirent, reclen);
kunmap_local(addr);
fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
@@ -516,6 +518,12 @@ retry_locked:
page = find_get_page_flags(file->f_mapping, index,
FGP_ACCESSED | FGP_LOCK);
+ /* Page gone missing, then re-added to cache, but not initialized? */
+ if (page && !PageUptodate(page)) {
+ unlock_page(page);
+ put_page(page);
+ page = NULL;
+ }
spin_lock(&fi->rdc.lock);
if (!page) {
/*
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 756d05779200..cf40895233f5 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -66,7 +66,7 @@ struct get_name_filldir {
char *name;
};
-static int get_name_filldir(struct dir_context *ctx, const char *name,
+static bool get_name_filldir(struct dir_context *ctx, const char *name,
int length, loff_t offset, u64 inum,
unsigned int type)
{
@@ -74,12 +74,12 @@ static int get_name_filldir(struct dir_context *ctx, const char *name,
container_of(ctx, struct get_name_filldir, ctx);
if (inum != gnfd->inum.no_addr)
- return 0;
+ return true;
memcpy(gnfd->name, name, length);
gnfd->name[length] = 0;
- return 1;
+ return false;
}
static int gfs2_get_name(struct dentry *parent, char *name,
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 892006fbbb09..60c6fb91fb58 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1443,6 +1443,22 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
}
+static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh)
+{
+ struct gfs2_glock *gl = fl_gh->gh_gl;
+
+ /*
+ * Make sure gfs2_glock_put() won't sleep under the file->f_lock
+ * spinlock.
+ */
+
+ gfs2_glock_hold(gl);
+ spin_lock(&file->f_lock);
+ gfs2_holder_uninit(fl_gh);
+ spin_unlock(&file->f_lock);
+ gfs2_glock_put(gl);
+}
+
static int do_flock(struct file *file, int cmd, struct file_lock *fl)
{
struct gfs2_file *fp = file->private_data;
@@ -1455,7 +1471,9 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
int sleeptime;
state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
- flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY_1CB) | GL_EXACT;
+ flags = GL_EXACT | GL_NOPID;
+ if (!IS_SETLKW(cmd))
+ flags |= LM_FLAG_TRY_1CB;
mutex_lock(&fp->f_fl_mutex);
@@ -1474,18 +1492,21 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
&gfs2_flock_glops, CREATE, &gl);
if (error)
goto out;
+ spin_lock(&file->f_lock);
gfs2_holder_init(gl, state, flags, fl_gh);
+ spin_unlock(&file->f_lock);
gfs2_glock_put(gl);
}
for (sleeptime = 1; sleeptime <= 4; sleeptime <<= 1) {
error = gfs2_glock_nq(fl_gh);
if (error != GLR_TRYFAILED)
break;
- fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT;
+ fl_gh->gh_flags &= ~LM_FLAG_TRY_1CB;
+ fl_gh->gh_flags |= LM_FLAG_TRY;
msleep(sleeptime);
}
if (error) {
- gfs2_holder_uninit(fl_gh);
+ __flock_holder_uninit(file, fl_gh);
if (error == GLR_TRYFAILED)
error = -EAGAIN;
} else {
@@ -1507,7 +1528,7 @@ static void do_unflock(struct file *file, struct file_lock *fl)
locks_lock_file_wait(file, fl);
if (gfs2_holder_initialized(fl_gh)) {
gfs2_glock_dq(fl_gh);
- gfs2_holder_uninit(fl_gh);
+ __flock_holder_uninit(file, fl_gh);
}
mutex_unlock(&fp->f_fl_mutex);
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 41b6c89e4bf7..df335c258eb0 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -33,6 +33,9 @@
#include <linux/list_sort.h>
#include <linux/lockref.h>
#include <linux/rhashtable.h>
+#include <linux/pid_namespace.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
#include "gfs2.h"
#include "incore.h"
@@ -59,6 +62,8 @@ typedef void (*glock_examiner) (struct gfs2_glock * gl);
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
static void __gfs2_glock_dq(struct gfs2_holder *gh);
+static void handle_callback(struct gfs2_glock *gl, unsigned int state,
+ unsigned long delay, bool remote);
static struct dentry *gfs2_root;
static struct workqueue_struct *glock_workqueue;
@@ -730,7 +735,8 @@ static bool is_system_glock(struct gfs2_glock *gl)
*
*/
-static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
+static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh,
+ unsigned int target)
__releases(&gl->gl_lockref.lock)
__acquires(&gl->gl_lockref.lock)
{
@@ -741,7 +747,8 @@ __acquires(&gl->gl_lockref.lock)
if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) &&
gh && !(gh->gh_flags & LM_FLAG_NOEXP))
- return;
+ goto skip_inval;
+
lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
LM_FLAG_PRIORITY);
GLOCK_BUG_ON(gl, gl->gl_state == target);
@@ -826,6 +833,20 @@ skip_inval:
(target != LM_ST_UNLOCKED ||
test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) {
if (!is_system_glock(gl)) {
+ handle_callback(gl, LM_ST_UNLOCKED, 0, false); /* sets demote */
+ /*
+ * Ordinarily, we would call dlm and its callback would call
+ * finish_xmote, which would call state_change() to the new state.
+ * Since we withdrew, we won't call dlm, so call state_change
+ * manually, but to the UNLOCKED state we desire.
+ */
+ state_change(gl, LM_ST_UNLOCKED);
+ /*
+ * We skip telling dlm to do the locking, so we won't get a
+ * reply that would otherwise clear GLF_LOCK. So we clear it here.
+ */
+ clear_bit(GLF_LOCK, &gl->gl_flags);
+ clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
goto out;
} else {
@@ -1018,16 +1039,18 @@ static void delete_work_func(struct work_struct *work)
if (gfs2_queue_delete_work(gl, 5 * HZ))
return;
}
- goto out;
}
inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
GFS2_BLKST_UNLINKED);
- if (!IS_ERR_OR_NULL(inode)) {
+ if (IS_ERR(inode)) {
+ if (PTR_ERR(inode) == -EAGAIN &&
+ (gfs2_queue_delete_work(gl, 5 * HZ)))
+ return;
+ } else {
d_prune_aliases(inode);
iput(inode);
}
-out:
gfs2_glock_put(gl);
}
@@ -1436,6 +1459,15 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
va_end(args);
}
+static inline bool pid_is_meaningful(const struct gfs2_holder *gh)
+{
+ if (!(gh->gh_flags & GL_NOPID))
+ return true;
+ if (gh->gh_state == LM_ST_UNLOCKED)
+ return true;
+ return false;
+}
+
/**
* add_to_queue - Add a holder to the wait queue (but look for recursion)
* @gh: the holder structure to add
@@ -1472,10 +1504,17 @@ __acquires(&gl->gl_lockref.lock)
}
list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
- if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
- (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK) &&
- !test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags)))
- goto trap_recursive;
+ if (likely(gh2->gh_owner_pid != gh->gh_owner_pid))
+ continue;
+ if (gh->gh_gl->gl_ops->go_type == LM_TYPE_FLOCK)
+ continue;
+ if (test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags))
+ continue;
+ if (!pid_is_meaningful(gh2))
+ continue;
+ goto trap_recursive;
+ }
+ list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
if (try_futile &&
!(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
fail:
@@ -2194,6 +2233,20 @@ static void dump_glock_func(struct gfs2_glock *gl)
dump_glock(NULL, gl, true);
}
+static void withdraw_dq(struct gfs2_glock *gl)
+{
+ spin_lock(&gl->gl_lockref.lock);
+ if (!__lockref_is_dead(&gl->gl_lockref) &&
+ glock_blocked_by_withdraw(gl))
+ do_error(gl, LM_OUT_ERROR); /* remove pending waiters */
+ spin_unlock(&gl->gl_lockref.lock);
+}
+
+void gfs2_gl_dq_holders(struct gfs2_sbd *sdp)
+{
+ glock_hash_walk(withdraw_dq, sdp);
+}
+
/**
* gfs2_gl_hash_clear - Empty out the glock hash table
* @sdp: the filesystem
@@ -2272,19 +2325,24 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh,
const char *fs_id_buf)
{
- struct task_struct *gh_owner = NULL;
+ const char *comm = "(none)";
+ pid_t owner_pid = 0;
char flags_buf[32];
rcu_read_lock();
- if (gh->gh_owner_pid)
+ if (pid_is_meaningful(gh)) {
+ struct task_struct *gh_owner;
+
+ comm = "(ended)";
+ owner_pid = pid_nr(gh->gh_owner_pid);
gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
+ if (gh_owner)
+ comm = gh_owner->comm;
+ }
gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
fs_id_buf, state2str(gh->gh_state),
hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
- gh->gh_error,
- gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
- gh_owner ? gh_owner->comm : "(ended)",
- (void *)gh->gh_ip);
+ gh->gh_error, (long)owner_pid, comm, (void *)gh->gh_ip);
rcu_read_unlock();
}
@@ -2699,6 +2757,172 @@ static const struct file_operations gfs2_glstats_fops = {
.release = gfs2_glocks_release,
};
+struct gfs2_glockfd_iter {
+ struct super_block *sb;
+ unsigned int tgid;
+ struct task_struct *task;
+ unsigned int fd;
+ struct file *file;
+};
+
+static struct task_struct *gfs2_glockfd_next_task(struct gfs2_glockfd_iter *i)
+{
+ struct pid_namespace *ns = task_active_pid_ns(current);
+ struct pid *pid;
+
+ if (i->task)
+ put_task_struct(i->task);
+
+ rcu_read_lock();
+retry:
+ i->task = NULL;
+ pid = find_ge_pid(i->tgid, ns);
+ if (pid) {
+ i->tgid = pid_nr_ns(pid, ns);
+ i->task = pid_task(pid, PIDTYPE_TGID);
+ if (!i->task) {
+ i->tgid++;
+ goto retry;
+ }
+ get_task_struct(i->task);
+ }
+ rcu_read_unlock();
+ return i->task;
+}
+
+static struct file *gfs2_glockfd_next_file(struct gfs2_glockfd_iter *i)
+{
+ if (i->file) {
+ fput(i->file);
+ i->file = NULL;
+ }
+
+ rcu_read_lock();
+ for(;; i->fd++) {
+ struct inode *inode;
+
+ i->file = task_lookup_next_fd_rcu(i->task, &i->fd);
+ if (!i->file) {
+ i->fd = 0;
+ break;
+ }
+ inode = file_inode(i->file);
+ if (inode->i_sb != i->sb)
+ continue;
+ if (get_file_rcu(i->file))
+ break;
+ }
+ rcu_read_unlock();
+ return i->file;
+}
+
+static void *gfs2_glockfd_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct gfs2_glockfd_iter *i = seq->private;
+
+ if (*pos)
+ return NULL;
+ while (gfs2_glockfd_next_task(i)) {
+ if (gfs2_glockfd_next_file(i))
+ return i;
+ i->tgid++;
+ }
+ return NULL;
+}
+
+static void *gfs2_glockfd_seq_next(struct seq_file *seq, void *iter_ptr,
+ loff_t *pos)
+{
+ struct gfs2_glockfd_iter *i = seq->private;
+
+ (*pos)++;
+ i->fd++;
+ do {
+ if (gfs2_glockfd_next_file(i))
+ return i;
+ i->tgid++;
+ } while (gfs2_glockfd_next_task(i));
+ return NULL;
+}
+
+static void gfs2_glockfd_seq_stop(struct seq_file *seq, void *iter_ptr)
+{
+ struct gfs2_glockfd_iter *i = seq->private;
+
+ if (i->file)
+ fput(i->file);
+ if (i->task)
+ put_task_struct(i->task);
+}
+
+static void gfs2_glockfd_seq_show_flock(struct seq_file *seq,
+ struct gfs2_glockfd_iter *i)
+{
+ struct gfs2_file *fp = i->file->private_data;
+ struct gfs2_holder *fl_gh = &fp->f_fl_gh;
+ struct lm_lockname gl_name = { .ln_type = LM_TYPE_RESERVED };
+
+ if (!READ_ONCE(fl_gh->gh_gl))
+ return;
+
+ spin_lock(&i->file->f_lock);
+ if (gfs2_holder_initialized(fl_gh))
+ gl_name = fl_gh->gh_gl->gl_name;
+ spin_unlock(&i->file->f_lock);
+
+ if (gl_name.ln_type != LM_TYPE_RESERVED) {
+ seq_printf(seq, "%d %u %u/%llx\n",
+ i->tgid, i->fd, gl_name.ln_type,
+ (unsigned long long)gl_name.ln_number);
+ }
+}
+
+static int gfs2_glockfd_seq_show(struct seq_file *seq, void *iter_ptr)
+{
+ struct gfs2_glockfd_iter *i = seq->private;
+ struct inode *inode = file_inode(i->file);
+ struct gfs2_glock *gl;
+
+ inode_lock_shared(inode);
+ gl = GFS2_I(inode)->i_iopen_gh.gh_gl;
+ if (gl) {
+ seq_printf(seq, "%d %u %u/%llx\n",
+ i->tgid, i->fd, gl->gl_name.ln_type,
+ (unsigned long long)gl->gl_name.ln_number);
+ }
+ gfs2_glockfd_seq_show_flock(seq, i);
+ inode_unlock_shared(inode);
+ return 0;
+}
+
+static const struct seq_operations gfs2_glockfd_seq_ops = {
+ .start = gfs2_glockfd_seq_start,
+ .next = gfs2_glockfd_seq_next,
+ .stop = gfs2_glockfd_seq_stop,
+ .show = gfs2_glockfd_seq_show,
+};
+
+static int gfs2_glockfd_open(struct inode *inode, struct file *file)
+{
+ struct gfs2_glockfd_iter *i;
+ struct gfs2_sbd *sdp = inode->i_private;
+
+ i = __seq_open_private(file, &gfs2_glockfd_seq_ops,
+ sizeof(struct gfs2_glockfd_iter));
+ if (!i)
+ return -ENOMEM;
+ i->sb = sdp->sd_vfs;
+ return 0;
+}
+
+static const struct file_operations gfs2_glockfd_fops = {
+ .owner = THIS_MODULE,
+ .open = gfs2_glockfd_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
DEFINE_SEQ_ATTRIBUTE(gfs2_sbstats);
void gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
@@ -2708,6 +2932,9 @@ void gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
&gfs2_glocks_fops);
+ debugfs_create_file("glockfd", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
+ &gfs2_glockfd_fops);
+
debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
&gfs2_glstats_fops);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 5aed8b500cf5..0d068f4fd7d6 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -91,6 +91,7 @@ enum {
#define GL_ASYNC 0x0040
#define GL_EXACT 0x0080
#define GL_SKIP 0x0100
+#define GL_NOPID 0x0200
#define GL_NOCACHE 0x0400
/*
@@ -274,6 +275,7 @@ extern void gfs2_cancel_delete_work(struct gfs2_glock *gl);
extern bool gfs2_delete_work_queued(const struct gfs2_glock *gl);
extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp);
extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
+extern void gfs2_gl_dq_holders(struct gfs2_sbd *sdp);
extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
extern void gfs2_glock_free(struct gfs2_glock *gl);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index c8ec876f33ea..04a201584fa7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -130,6 +130,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
if (inode->i_state & I_NEW) {
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_glock *io_gl;
+ int extra_flags = 0;
error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE,
&ip->i_gl);
@@ -141,9 +142,12 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
if (unlikely(error))
goto fail;
- if (blktype != GFS2_BLKST_UNLINKED)
+ if (blktype == GFS2_BLKST_UNLINKED)
+ extra_flags |= LM_FLAG_TRY;
+ else
gfs2_cancel_delete_work(io_gl);
- error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT,
+ error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED,
+ GL_EXACT | GL_NOPID | extra_flags,
&ip->i_iopen_gh);
gfs2_glock_put(io_gl);
if (unlikely(error))
@@ -210,6 +214,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
return inode;
fail:
+ if (error == GLR_TRYFAILED)
+ error = -EAGAIN;
if (gfs2_holder_initialized(&ip->i_iopen_gh))
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
if (gfs2_holder_initialized(&i_gh))
@@ -720,7 +726,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr);
BUG_ON(error);
- error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
+ error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT | GL_NOPID,
+ &ip->i_iopen_gh);
if (error)
goto fail_gunlock2;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 6ce369b096d4..71911bf9ab34 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1302,7 +1302,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
memcpy(cluster, table, strlen(table) - strlen(fsname));
fsname++;
- flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL;
+ flags = DLM_LSFL_NEWEXCL;
/*
* create/join lockspace
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 14ae9de76277..afcb32854f14 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -151,14 +151,6 @@ static int __init init_gfs2_fs(void)
if (error)
goto fail_shrinker;
- error = register_filesystem(&gfs2_fs_type);
- if (error)
- goto fail_fs1;
-
- error = register_filesystem(&gfs2meta_fs_type);
- if (error)
- goto fail_fs2;
-
error = -ENOMEM;
gfs_recovery_wq = alloc_workqueue("gfs_recovery",
WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
@@ -180,11 +172,23 @@ static int __init init_gfs2_fs(void)
goto fail_mempool;
gfs2_register_debugfs();
+ error = register_filesystem(&gfs2_fs_type);
+ if (error)
+ goto fail_fs1;
+
+ error = register_filesystem(&gfs2meta_fs_type);
+ if (error)
+ goto fail_fs2;
+
pr_info("GFS2 installed\n");
return 0;
+fail_fs2:
+ unregister_filesystem(&gfs2_fs_type);
+fail_fs1:
+ mempool_destroy(gfs2_page_pool);
fail_mempool:
destroy_workqueue(gfs2_freeze_wq);
fail_wq3:
@@ -192,10 +196,6 @@ fail_wq3:
fail_wq2:
destroy_workqueue(gfs_recovery_wq);
fail_wq1:
- unregister_filesystem(&gfs2meta_fs_type);
-fail_fs2:
- unregister_filesystem(&gfs2_fs_type);
-fail_fs1:
unregister_shrinker(&gfs2_qd_shrinker);
fail_shrinker:
kmem_cache_destroy(gfs2_trans_cachep);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 7e70e0ba5a6c..6ed728aae9a5 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -525,8 +525,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
if (buffer_uptodate(first_bh))
goto out;
- if (!buffer_locked(first_bh))
- ll_rw_block(REQ_OP_READ | REQ_META | REQ_PRIO, 1, &first_bh);
+ bh_read_nowait(first_bh, REQ_META | REQ_PRIO);
dblock++;
extlen--;
@@ -534,9 +533,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
while (extlen) {
bh = gfs2_getbuf(gl, dblock, CREATE);
- if (!buffer_uptodate(bh) && !buffer_locked(bh))
- ll_rw_block(REQ_OP_READ | REQ_RAHEAD | REQ_META |
- REQ_PRIO, 1, &bh);
+ bh_readahead(bh, REQ_RAHEAD | REQ_META | REQ_PRIO);
brelse(bh);
dblock++;
extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 549879929c84..c0cf1d2d0ef5 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -178,7 +178,10 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
pr_warn("Invalid block size\n");
return -EINVAL;
}
-
+ if (sb->sb_bsize_shift != ffs(sb->sb_bsize) - 1) {
+ pr_warn("Invalid block size shift\n");
+ return -EINVAL;
+ }
return 0;
}
@@ -381,8 +384,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
if (!table[0])
table = sdp->sd_vfs->s_id;
- strlcpy(sdp->sd_proto_name, proto, GFS2_FSNAME_LEN);
- strlcpy(sdp->sd_table_name, table, GFS2_FSNAME_LEN);
+ BUILD_BUG_ON(GFS2_LOCKNAME_LEN > GFS2_FSNAME_LEN);
+
+ strscpy(sdp->sd_proto_name, proto, GFS2_LOCKNAME_LEN);
+ strscpy(sdp->sd_table_name, table, GFS2_LOCKNAME_LEN);
table = sdp->sd_table_name;
while ((table = strchr(table, '/')))
@@ -401,7 +406,8 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
error = gfs2_glock_nq_num(sdp,
GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
- LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
+ LM_ST_EXCLUSIVE,
+ LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID,
mount_gh);
if (error) {
fs_err(sdp, "can't acquire mount glock: %d\n", error);
@@ -411,7 +417,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
error = gfs2_glock_nq_num(sdp,
GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_EXACT,
+ LM_FLAG_NOEXP | GL_EXACT | GL_NOPID,
&sdp->sd_live_gh);
if (error) {
fs_err(sdp, "can't acquire live glock: %d\n", error);
@@ -687,7 +693,7 @@ static int init_statfs(struct gfs2_sbd *sdp)
iput(pn);
pn = NULL;
ip = GFS2_I(sdp->sd_sc_inode);
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_NOPID,
&sdp->sd_sc_gh);
if (error) {
fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
@@ -776,7 +782,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid,
&gfs2_journal_glops,
LM_ST_EXCLUSIVE,
- LM_FLAG_NOEXP | GL_NOCACHE,
+ LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID,
&sdp->sd_journal_gh);
if (error) {
fs_err(sdp, "can't acquire journal glock: %d\n", error);
@@ -786,7 +792,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
ip = GFS2_I(sdp->sd_jdesc->jd_inode);
sdp->sd_jinode_gl = ip->i_gl;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE,
+ LM_FLAG_NOEXP | GL_EXACT |
+ GL_NOCACHE | GL_NOPID,
&sdp->sd_jinode_gh);
if (error) {
fs_err(sdp, "can't acquire journal inode glock: %d\n",
@@ -957,7 +964,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
pn = NULL;
ip = GFS2_I(sdp->sd_qc_inode);
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_NOPID,
&sdp->sd_qc_gh);
if (error) {
fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
@@ -1439,13 +1446,13 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
switch (o) {
case Opt_lockproto:
- strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN);
+ strscpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN);
break;
case Opt_locktable:
- strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN);
+ strscpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN);
break;
case Opt_hostdata:
- strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN);
+ strscpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN);
break;
case Opt_spectator:
args->ar_spectator = 1;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index f201eaf59d0d..1ed17226d9ed 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -745,12 +745,8 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
}
if (PageUptodate(page))
set_buffer_uptodate(bh);
- if (!buffer_uptodate(bh)) {
- ll_rw_block(REQ_OP_READ | REQ_META | REQ_PRIO, 1, &bh);
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh))
- goto unlock_out;
- }
+ if (bh_read(bh, REQ_META | REQ_PRIO) < 0)
+ goto unlock_out;
if (gfs2_is_jdata(ip))
gfs2_trans_add_data(ip->i_gl, bh);
else
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b5b0f285b27f..b018957a1bb2 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -346,7 +346,8 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
}
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE,
- LM_FLAG_NOEXP, &sdp->sd_freeze_gh);
+ LM_FLAG_NOEXP | GL_NOPID,
+ &sdp->sd_freeze_gh);
if (error)
goto out;
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 8241029a2a5d..7a6aeffcdf5c 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -164,6 +164,11 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
}
if (!ret)
gfs2_make_fs_ro(sdp);
+ /*
+ * Dequeue any pending non-system glock holders that can no
+ * longer be granted because the file system is withdrawn.
+ */
+ gfs2_gl_dq_holders(sdp);
gfs2_freeze_unlock(&freeze_gh);
}
@@ -204,6 +209,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
* exception code in glock_dq.
*/
iput(inode);
+ sdp->sd_jdesc->jd_inode = NULL;
/*
* Wait until the journal inode's glock is freed. This allows try locks
* on other nodes to be successful, otherwise we remain the owner of
@@ -226,7 +232,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
*/
fs_warn(sdp, "Requesting recovery of jid %d.\n",
sdp->sd_lockstruct.ls_jid);
- gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP,
+ gfs2_holder_reinit(LM_ST_EXCLUSIVE,
+ LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | GL_NOPID,
&sdp->sd_live_gh);
msleep(GL_GLOCK_MAX_HOLD);
/*
@@ -251,7 +258,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
fs_warn(sdp, "Unable to recover our journal jid %d.\n",
sdp->sd_lockstruct.ls_jid);
gfs2_glock_dq_wait(&sdp->sd_live_gh);
- gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT,
+ gfs2_holder_reinit(LM_ST_SHARED,
+ LM_FLAG_NOEXP | GL_EXACT | GL_NOPID,
&sdp->sd_live_gh);
gfs2_glock_nq(&sdp->sd_live_gh);
}
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index c83fd0e8404d..2015e42e752a 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -21,7 +21,6 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
int pagenum;
int bytes_read;
int bytes_to_read;
- void *vaddr;
off += node->page_offset;
pagenum = off >> PAGE_SHIFT;
@@ -33,9 +32,7 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
page = node->page[pagenum];
bytes_to_read = min_t(int, len - bytes_read, PAGE_SIZE - off);
- vaddr = kmap_atomic(page);
- memcpy(buf + bytes_read, vaddr + off, bytes_to_read);
- kunmap_atomic(vaddr);
+ memcpy_from_page(buf + bytes_read, page, off, bytes_to_read);
pagenum++;
off = 0; /* page offset only applies to the first page */
@@ -80,8 +77,7 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len)
off += node->page_offset;
page = node->page[0];
- memcpy(kmap(page) + off, buf, len);
- kunmap(page);
+ memcpy_to_page(page, off, buf, len);
set_page_dirty(page);
}
@@ -105,8 +101,7 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len)
off += node->page_offset;
page = node->page[0];
- memset(kmap(page) + off, 0, len);
- kunmap(page);
+ memzero_page(page, off, len);
set_page_dirty(page);
}
@@ -123,9 +118,7 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
src_page = src_node->page[0];
dst_page = dst_node->page[0];
- memcpy(kmap(dst_page) + dst, kmap(src_page) + src, len);
- kunmap(src_page);
- kunmap(dst_page);
+ memcpy_page(dst_page, dst, src_page, src, len);
set_page_dirty(dst_page);
}
@@ -140,9 +133,9 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
src += node->page_offset;
dst += node->page_offset;
page = node->page[0];
- ptr = kmap(page);
+ ptr = kmap_local_page(page);
memmove(ptr + dst, ptr + src, len);
- kunmap(page);
+ kunmap_local(ptr);
set_page_dirty(page);
}
@@ -346,13 +339,14 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num)
if (!test_bit(HFS_BNODE_NEW, &node->flags))
return node;
- desc = (struct hfs_bnode_desc *)(kmap(node->page[0]) + node->page_offset);
+ desc = (struct hfs_bnode_desc *)(kmap_local_page(node->page[0]) +
+ node->page_offset);
node->prev = be32_to_cpu(desc->prev);
node->next = be32_to_cpu(desc->next);
node->num_recs = be16_to_cpu(desc->num_recs);
node->type = desc->type;
node->height = desc->height;
- kunmap(node->page[0]);
+ kunmap_local(desc);
switch (node->type) {
case HFS_NODE_HEADER:
@@ -436,14 +430,12 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
}
pagep = node->page;
- memset(kmap(*pagep) + node->page_offset, 0,
- min((int)PAGE_SIZE, (int)tree->node_size));
+ memzero_page(*pagep, node->page_offset,
+ min((int)PAGE_SIZE, (int)tree->node_size));
set_page_dirty(*pagep);
- kunmap(*pagep);
for (i = 1; i < tree->pages_per_bnode; i++) {
- memset(kmap(*++pagep), 0, PAGE_SIZE);
+ memzero_page(*++pagep, 0, PAGE_SIZE);
set_page_dirty(*pagep);
- kunmap(*pagep);
}
clear_bit(HFS_BNODE_NEW, &node->flags);
wake_up(&node->lock_wq);
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 19017d296173..2fa4b1f8cc7f 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -80,7 +80,8 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
goto free_inode;
/* Load the header */
- head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc));
+ head = (struct hfs_btree_header_rec *)(kmap_local_page(page) +
+ sizeof(struct hfs_bnode_desc));
tree->root = be32_to_cpu(head->root);
tree->leaf_count = be32_to_cpu(head->leaf_count);
tree->leaf_head = be32_to_cpu(head->leaf_head);
@@ -119,11 +120,12 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
tree->node_size_shift = ffs(size) - 1;
tree->pages_per_bnode = (tree->node_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- kunmap(page);
+ kunmap_local(head);
put_page(page);
return tree;
fail_page:
+ kunmap_local(head);
put_page(page);
free_inode:
tree->inode->i_mapping->a_ops = &hfs_aops;
@@ -169,7 +171,8 @@ void hfs_btree_write(struct hfs_btree *tree)
return;
/* Load the header */
page = node->page[0];
- head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc));
+ head = (struct hfs_btree_header_rec *)(kmap_local_page(page) +
+ sizeof(struct hfs_bnode_desc));
head->root = cpu_to_be32(tree->root);
head->leaf_count = cpu_to_be32(tree->leaf_count);
@@ -180,7 +183,7 @@ void hfs_btree_write(struct hfs_btree *tree)
head->attributes = cpu_to_be32(tree->attributes);
head->depth = cpu_to_be16(tree->depth);
- kunmap(page);
+ kunmap_local(head);
set_page_dirty(page);
hfs_bnode_put(node);
}
@@ -268,7 +271,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
off += node->page_offset;
pagep = node->page + (off >> PAGE_SHIFT);
- data = kmap(*pagep);
+ data = kmap_local_page(*pagep);
off &= ~PAGE_MASK;
idx = 0;
@@ -281,7 +284,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
idx += i;
data[off] |= m;
set_page_dirty(*pagep);
- kunmap(*pagep);
+ kunmap_local(data);
tree->free_nodes--;
mark_inode_dirty(tree->inode);
hfs_bnode_put(node);
@@ -290,14 +293,14 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
}
}
if (++off >= PAGE_SIZE) {
- kunmap(*pagep);
- data = kmap(*++pagep);
+ kunmap_local(data);
+ data = kmap_local_page(*++pagep);
off = 0;
}
idx += 8;
len--;
}
- kunmap(*pagep);
+ kunmap_local(data);
nidx = node->next;
if (!nidx) {
printk(KERN_DEBUG "create new bmap node...\n");
@@ -313,7 +316,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
off = off16;
off += node->page_offset;
pagep = node->page + (off >> PAGE_SHIFT);
- data = kmap(*pagep);
+ data = kmap_local_page(*pagep);
off &= ~PAGE_MASK;
}
}
@@ -360,20 +363,20 @@ void hfs_bmap_free(struct hfs_bnode *node)
}
off += node->page_offset + nidx / 8;
page = node->page[off >> PAGE_SHIFT];
- data = kmap(page);
+ data = kmap_local_page(page);
off &= ~PAGE_MASK;
m = 1 << (~nidx & 7);
byte = data[off];
if (!(byte & m)) {
pr_crit("trying to free free bnode %u(%d)\n",
node->this, node->type);
- kunmap(page);
+ kunmap_local(data);
hfs_bnode_put(node);
return;
}
data[off] = byte & ~m;
set_page_dirty(page);
- kunmap(page);
+ kunmap_local(data);
hfs_bnode_put(node);
tree->free_nodes++;
mark_inode_dirty(tree->inode);
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index cebce0cfe340..bd8dcea85588 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -39,7 +39,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size,
start = size;
goto out;
}
- pptr = kmap(page);
+ pptr = kmap_local_page(page);
curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32;
i = offset % 32;
offset &= ~(PAGE_CACHE_BITS - 1);
@@ -74,7 +74,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size,
}
curr++;
}
- kunmap(page);
+ kunmap_local(pptr);
offset += PAGE_CACHE_BITS;
if (offset >= size)
break;
@@ -84,7 +84,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size,
start = size;
goto out;
}
- curr = pptr = kmap(page);
+ curr = pptr = kmap_local_page(page);
if ((size ^ offset) / PAGE_CACHE_BITS)
end = pptr + PAGE_CACHE_BITS / 32;
else
@@ -127,7 +127,7 @@ found:
len -= 32;
}
set_page_dirty(page);
- kunmap(page);
+ kunmap_local(pptr);
offset += PAGE_CACHE_BITS;
page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS,
NULL);
@@ -135,7 +135,7 @@ found:
start = size;
goto out;
}
- pptr = kmap(page);
+ pptr = kmap_local_page(page);
curr = pptr;
end = pptr + PAGE_CACHE_BITS / 32;
}
@@ -151,7 +151,7 @@ last:
done:
*curr = cpu_to_be32(n);
set_page_dirty(page);
- kunmap(page);
+ kunmap_local(pptr);
*max = offset + (curr - pptr) * 32 + i - start;
sbi->free_blocks -= *max;
hfsplus_mark_mdb_dirty(sb);
@@ -185,7 +185,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
page = read_mapping_page(mapping, pnr, NULL);
if (IS_ERR(page))
goto kaboom;
- pptr = kmap(page);
+ pptr = kmap_local_page(page);
curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32;
end = pptr + PAGE_CACHE_BITS / 32;
len = count;
@@ -215,11 +215,11 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
if (!count)
break;
set_page_dirty(page);
- kunmap(page);
+ kunmap_local(pptr);
page = read_mapping_page(mapping, ++pnr, NULL);
if (IS_ERR(page))
goto kaboom;
- pptr = kmap(page);
+ pptr = kmap_local_page(page);
curr = pptr;
end = pptr + PAGE_CACHE_BITS / 32;
}
@@ -231,7 +231,7 @@ done:
}
out:
set_page_dirty(page);
- kunmap(page);
+ kunmap_local(pptr);
sbi->free_blocks += len;
hfsplus_mark_mdb_dirty(sb);
mutex_unlock(&sbi->alloc_mutex);
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index a5ab00e54220..87974d5e6791 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -29,14 +29,12 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
off &= ~PAGE_MASK;
l = min_t(int, len, PAGE_SIZE - off);
- memcpy(buf, kmap(*pagep) + off, l);
- kunmap(*pagep);
+ memcpy_from_page(buf, *pagep, off, l);
while ((len -= l) != 0) {
buf += l;
l = min_t(int, len, PAGE_SIZE);
- memcpy(buf, kmap(*++pagep), l);
- kunmap(*pagep);
+ memcpy_from_page(buf, *++pagep, 0, l);
}
}
@@ -82,16 +80,14 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len)
off &= ~PAGE_MASK;
l = min_t(int, len, PAGE_SIZE - off);
- memcpy(kmap(*pagep) + off, buf, l);
+ memcpy_to_page(*pagep, off, buf, l);
set_page_dirty(*pagep);
- kunmap(*pagep);
while ((len -= l) != 0) {
buf += l;
l = min_t(int, len, PAGE_SIZE);
- memcpy(kmap(*++pagep), buf, l);
+ memcpy_to_page(*++pagep, 0, buf, l);
set_page_dirty(*pagep);
- kunmap(*pagep);
}
}
@@ -112,15 +108,13 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len)
off &= ~PAGE_MASK;
l = min_t(int, len, PAGE_SIZE - off);
- memset(kmap(*pagep) + off, 0, l);
+ memzero_page(*pagep, off, l);
set_page_dirty(*pagep);
- kunmap(*pagep);
while ((len -= l) != 0) {
l = min_t(int, len, PAGE_SIZE);
- memset(kmap(*++pagep), 0, l);
+ memzero_page(*++pagep, 0, l);
set_page_dirty(*pagep);
- kunmap(*pagep);
}
}
@@ -142,24 +136,20 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
if (src == dst) {
l = min_t(int, len, PAGE_SIZE - src);
- memcpy(kmap(*dst_page) + src, kmap(*src_page) + src, l);
- kunmap(*src_page);
+ memcpy_page(*dst_page, src, *src_page, src, l);
set_page_dirty(*dst_page);
- kunmap(*dst_page);
while ((len -= l) != 0) {
l = min_t(int, len, PAGE_SIZE);
- memcpy(kmap(*++dst_page), kmap(*++src_page), l);
- kunmap(*src_page);
+ memcpy_page(*++dst_page, 0, *++src_page, 0, l);
set_page_dirty(*dst_page);
- kunmap(*dst_page);
}
} else {
void *src_ptr, *dst_ptr;
do {
- src_ptr = kmap(*src_page) + src;
- dst_ptr = kmap(*dst_page) + dst;
+ dst_ptr = kmap_local_page(*dst_page) + dst;
+ src_ptr = kmap_local_page(*src_page) + src;
if (PAGE_SIZE - src < PAGE_SIZE - dst) {
l = PAGE_SIZE - src;
src = 0;
@@ -171,9 +161,9 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
}
l = min(len, l);
memcpy(dst_ptr, src_ptr, l);
- kunmap(*src_page);
+ kunmap_local(src_ptr);
set_page_dirty(*dst_page);
- kunmap(*dst_page);
+ kunmap_local(dst_ptr);
if (!dst)
dst_page++;
else
@@ -185,6 +175,7 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
{
struct page **src_page, **dst_page;
+ void *src_ptr, *dst_ptr;
int l;
hfs_dbg(BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len);
@@ -202,27 +193,28 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
if (src == dst) {
while (src < len) {
- memmove(kmap(*dst_page), kmap(*src_page), src);
- kunmap(*src_page);
+ dst_ptr = kmap_local_page(*dst_page);
+ src_ptr = kmap_local_page(*src_page);
+ memmove(dst_ptr, src_ptr, src);
+ kunmap_local(src_ptr);
set_page_dirty(*dst_page);
- kunmap(*dst_page);
+ kunmap_local(dst_ptr);
len -= src;
src = PAGE_SIZE;
src_page--;
dst_page--;
}
src -= len;
- memmove(kmap(*dst_page) + src,
- kmap(*src_page) + src, len);
- kunmap(*src_page);
+ dst_ptr = kmap_local_page(*dst_page);
+ src_ptr = kmap_local_page(*src_page);
+ memmove(dst_ptr + src, src_ptr + src, len);
+ kunmap_local(src_ptr);
set_page_dirty(*dst_page);
- kunmap(*dst_page);
+ kunmap_local(dst_ptr);
} else {
- void *src_ptr, *dst_ptr;
-
do {
- src_ptr = kmap(*src_page) + src;
- dst_ptr = kmap(*dst_page) + dst;
+ dst_ptr = kmap_local_page(*dst_page) + dst;
+ src_ptr = kmap_local_page(*src_page) + src;
if (src < dst) {
l = src;
src = PAGE_SIZE;
@@ -234,9 +226,9 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
}
l = min(len, l);
memmove(dst_ptr - l, src_ptr - l, l);
- kunmap(*src_page);
+ kunmap_local(src_ptr);
set_page_dirty(*dst_page);
- kunmap(*dst_page);
+ kunmap_local(dst_ptr);
if (dst == PAGE_SIZE)
dst_page--;
else
@@ -251,26 +243,27 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
if (src == dst) {
l = min_t(int, len, PAGE_SIZE - src);
- memmove(kmap(*dst_page) + src,
- kmap(*src_page) + src, l);
- kunmap(*src_page);
+
+ dst_ptr = kmap_local_page(*dst_page) + src;
+ src_ptr = kmap_local_page(*src_page) + src;
+ memmove(dst_ptr, src_ptr, l);
+ kunmap_local(src_ptr);
set_page_dirty(*dst_page);
- kunmap(*dst_page);
+ kunmap_local(dst_ptr);
while ((len -= l) != 0) {
l = min_t(int, len, PAGE_SIZE);
- memmove(kmap(*++dst_page),
- kmap(*++src_page), l);
- kunmap(*src_page);
+ dst_ptr = kmap_local_page(*++dst_page);
+ src_ptr = kmap_local_page(*++src_page);
+ memmove(dst_ptr, src_ptr, l);
+ kunmap_local(src_ptr);
set_page_dirty(*dst_page);
- kunmap(*dst_page);
+ kunmap_local(dst_ptr);
}
} else {
- void *src_ptr, *dst_ptr;
-
do {
- src_ptr = kmap(*src_page) + src;
- dst_ptr = kmap(*dst_page) + dst;
+ dst_ptr = kmap_local_page(*dst_page) + dst;
+ src_ptr = kmap_local_page(*src_page) + src;
if (PAGE_SIZE - src <
PAGE_SIZE - dst) {
l = PAGE_SIZE - src;
@@ -283,9 +276,9 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
}
l = min(len, l);
memmove(dst_ptr, src_ptr, l);
- kunmap(*src_page);
+ kunmap_local(src_ptr);
set_page_dirty(*dst_page);
- kunmap(*dst_page);
+ kunmap_local(dst_ptr);
if (!dst)
dst_page++;
else
@@ -498,14 +491,14 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num)
if (!test_bit(HFS_BNODE_NEW, &node->flags))
return node;
- desc = (struct hfs_bnode_desc *)(kmap(node->page[0]) +
- node->page_offset);
+ desc = (struct hfs_bnode_desc *)(kmap_local_page(node->page[0]) +
+ node->page_offset);
node->prev = be32_to_cpu(desc->prev);
node->next = be32_to_cpu(desc->next);
node->num_recs = be16_to_cpu(desc->num_recs);
node->type = desc->type;
node->height = desc->height;
- kunmap(node->page[0]);
+ kunmap_local(desc);
switch (node->type) {
case HFS_NODE_HEADER:
@@ -589,14 +582,12 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
}
pagep = node->page;
- memset(kmap(*pagep) + node->page_offset, 0,
- min_t(int, PAGE_SIZE, tree->node_size));
+ memzero_page(*pagep, node->page_offset,
+ min_t(int, PAGE_SIZE, tree->node_size));
set_page_dirty(*pagep);
- kunmap(*pagep);
for (i = 1; i < tree->pages_per_bnode; i++) {
- memset(kmap(*++pagep), 0, PAGE_SIZE);
+ memzero_page(*++pagep, 0, PAGE_SIZE);
set_page_dirty(*pagep);
- kunmap(*pagep);
}
clear_bit(HFS_BNODE_NEW, &node->flags);
wake_up(&node->lock_wq);
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 66774f4cb4fd..9e1732a2b92a 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -163,7 +163,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
goto free_inode;
/* Load the header */
- head = (struct hfs_btree_header_rec *)(kmap(page) +
+ head = (struct hfs_btree_header_rec *)(kmap_local_page(page) +
sizeof(struct hfs_bnode_desc));
tree->root = be32_to_cpu(head->root);
tree->leaf_count = be32_to_cpu(head->leaf_count);
@@ -240,11 +240,12 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
(tree->node_size + PAGE_SIZE - 1) >>
PAGE_SHIFT;
- kunmap(page);
+ kunmap_local(head);
put_page(page);
return tree;
fail_page:
+ kunmap_local(head);
put_page(page);
free_inode:
tree->inode->i_mapping->a_ops = &hfsplus_aops;
@@ -291,7 +292,7 @@ int hfs_btree_write(struct hfs_btree *tree)
return -EIO;
/* Load the header */
page = node->page[0];
- head = (struct hfs_btree_header_rec *)(kmap(page) +
+ head = (struct hfs_btree_header_rec *)(kmap_local_page(page) +
sizeof(struct hfs_bnode_desc));
head->root = cpu_to_be32(tree->root);
@@ -303,7 +304,7 @@ int hfs_btree_write(struct hfs_btree *tree)
head->attributes = cpu_to_be32(tree->attributes);
head->depth = cpu_to_be16(tree->depth);
- kunmap(page);
+ kunmap_local(head);
set_page_dirty(page);
hfs_bnode_put(node);
return 0;
@@ -394,7 +395,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
off += node->page_offset;
pagep = node->page + (off >> PAGE_SHIFT);
- data = kmap(*pagep);
+ data = kmap_local_page(*pagep);
off &= ~PAGE_MASK;
idx = 0;
@@ -407,7 +408,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
idx += i;
data[off] |= m;
set_page_dirty(*pagep);
- kunmap(*pagep);
+ kunmap_local(data);
tree->free_nodes--;
mark_inode_dirty(tree->inode);
hfs_bnode_put(node);
@@ -417,14 +418,14 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
}
}
if (++off >= PAGE_SIZE) {
- kunmap(*pagep);
- data = kmap(*++pagep);
+ kunmap_local(data);
+ data = kmap_local_page(*++pagep);
off = 0;
}
idx += 8;
len--;
}
- kunmap(*pagep);
+ kunmap_local(data);
nidx = node->next;
if (!nidx) {
hfs_dbg(BNODE_MOD, "create new bmap node\n");
@@ -440,7 +441,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
off = off16;
off += node->page_offset;
pagep = node->page + (off >> PAGE_SHIFT);
- data = kmap(*pagep);
+ data = kmap_local_page(*pagep);
off &= ~PAGE_MASK;
}
}
@@ -490,7 +491,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
}
off += node->page_offset + nidx / 8;
page = node->page[off >> PAGE_SHIFT];
- data = kmap(page);
+ data = kmap_local_page(page);
off &= ~PAGE_MASK;
m = 1 << (~nidx & 7);
byte = data[off];
@@ -498,13 +499,13 @@ void hfs_bmap_free(struct hfs_bnode *node)
pr_crit("trying to free free bnode "
"%u(%d)\n",
node->this, node->type);
- kunmap(page);
+ kunmap_local(data);
hfs_bnode_put(node);
return;
}
data[off] = byte & ~m;
set_page_dirty(page);
- kunmap(page);
+ kunmap_local(data);
hfs_bnode_put(node);
tree->free_nodes++;
mark_inode_dirty(tree->inode);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 07881b76d42f..277468783fee 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -103,7 +103,7 @@ static char *__dentry_name(struct dentry *dentry, char *name)
*/
BUG_ON(p + strlen(p) + 1 != name + PATH_MAX);
- strlcpy(name, root, PATH_MAX);
+ strscpy(name, root, PATH_MAX);
if (len > p - name) {
__putname(name);
return NULL;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index f7a5b5124d8a..dd54f67e47fd 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -364,13 +364,155 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
return -EINVAL;
}
-static void remove_huge_page(struct page *page)
+static void hugetlb_delete_from_page_cache(struct page *page)
{
ClearPageDirty(page);
ClearPageUptodate(page);
delete_from_page_cache(page);
}
+/*
+ * Called with i_mmap_rwsem held for inode based vma maps. This makes
+ * sure vma (and vm_mm) will not go away. We also hold the hugetlb fault
+ * mutex for the page in the mapping. So, we can not race with page being
+ * faulted into the vma.
+ */
+static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
+ unsigned long addr, struct page *page)
+{
+ pte_t *ptep, pte;
+
+ ptep = huge_pte_offset(vma->vm_mm, addr,
+ huge_page_size(hstate_vma(vma)));
+
+ if (!ptep)
+ return false;
+
+ pte = huge_ptep_get(ptep);
+ if (huge_pte_none(pte) || !pte_present(pte))
+ return false;
+
+ if (pte_page(pte) == page)
+ return true;
+
+ return false;
+}
+
+/*
+ * Can vma_offset_start/vma_offset_end overflow on 32-bit arches?
+ * No, because the interval tree returns us only those vmas
+ * which overlap the truncated area starting at pgoff,
+ * and no vma on a 32-bit arch can span beyond the 4GB.
+ */
+static unsigned long vma_offset_start(struct vm_area_struct *vma, pgoff_t start)
+{
+ if (vma->vm_pgoff < start)
+ return (start - vma->vm_pgoff) << PAGE_SHIFT;
+ else
+ return 0;
+}
+
+static unsigned long vma_offset_end(struct vm_area_struct *vma, pgoff_t end)
+{
+ unsigned long t_end;
+
+ if (!end)
+ return vma->vm_end;
+
+ t_end = ((end - vma->vm_pgoff) << PAGE_SHIFT) + vma->vm_start;
+ if (t_end > vma->vm_end)
+ t_end = vma->vm_end;
+ return t_end;
+}
+
+/*
+ * Called with hugetlb fault mutex held. Therefore, no more mappings to
+ * this folio can be created while executing the routine.
+ */
+static void hugetlb_unmap_file_folio(struct hstate *h,
+ struct address_space *mapping,
+ struct folio *folio, pgoff_t index)
+{
+ struct rb_root_cached *root = &mapping->i_mmap;
+ struct hugetlb_vma_lock *vma_lock;
+ struct page *page = &folio->page;
+ struct vm_area_struct *vma;
+ unsigned long v_start;
+ unsigned long v_end;
+ pgoff_t start, end;
+
+ start = index * pages_per_huge_page(h);
+ end = (index + 1) * pages_per_huge_page(h);
+
+ i_mmap_lock_write(mapping);
+retry:
+ vma_lock = NULL;
+ vma_interval_tree_foreach(vma, root, start, end - 1) {
+ v_start = vma_offset_start(vma, start);
+ v_end = vma_offset_end(vma, end);
+
+ if (!hugetlb_vma_maps_page(vma, vma->vm_start + v_start, page))
+ continue;
+
+ if (!hugetlb_vma_trylock_write(vma)) {
+ vma_lock = vma->vm_private_data;
+ /*
+ * If we can not get vma lock, we need to drop
+ * immap_sema and take locks in order. First,
+ * take a ref on the vma_lock structure so that
+ * we can be guaranteed it will not go away when
+ * dropping immap_sema.
+ */
+ kref_get(&vma_lock->refs);
+ break;
+ }
+
+ unmap_hugepage_range(vma, vma->vm_start + v_start, v_end,
+ NULL, ZAP_FLAG_DROP_MARKER);
+ hugetlb_vma_unlock_write(vma);
+ }
+
+ i_mmap_unlock_write(mapping);
+
+ if (vma_lock) {
+ /*
+ * Wait on vma_lock. We know it is still valid as we have
+ * a reference. We must 'open code' vma locking as we do
+ * not know if vma_lock is still attached to vma.
+ */
+ down_write(&vma_lock->rw_sema);
+ i_mmap_lock_write(mapping);
+
+ vma = vma_lock->vma;
+ if (!vma) {
+ /*
+ * If lock is no longer attached to vma, then just
+ * unlock, drop our reference and retry looking for
+ * other vmas.
+ */
+ up_write(&vma_lock->rw_sema);
+ kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
+ goto retry;
+ }
+
+ /*
+ * vma_lock is still attached to vma. Check to see if vma
+ * still maps page and if so, unmap.
+ */
+ v_start = vma_offset_start(vma, start);
+ v_end = vma_offset_end(vma, end);
+ if (hugetlb_vma_maps_page(vma, vma->vm_start + v_start, page))
+ unmap_hugepage_range(vma, vma->vm_start + v_start,
+ v_end, NULL,
+ ZAP_FLAG_DROP_MARKER);
+
+ kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
+ hugetlb_vma_unlock_write(vma);
+
+ goto retry;
+ }
+}
+
static void
hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
zap_flags_t zap_flags)
@@ -383,32 +525,66 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
* an inclusive "last".
*/
vma_interval_tree_foreach(vma, root, start, end ? end - 1 : ULONG_MAX) {
- unsigned long v_offset;
+ unsigned long v_start;
unsigned long v_end;
+ if (!hugetlb_vma_trylock_write(vma))
+ continue;
+
+ v_start = vma_offset_start(vma, start);
+ v_end = vma_offset_end(vma, end);
+
+ unmap_hugepage_range(vma, vma->vm_start + v_start, v_end,
+ NULL, zap_flags);
+
/*
- * Can the expression below overflow on 32-bit arches?
- * No, because the interval tree returns us only those vmas
- * which overlap the truncated area starting at pgoff,
- * and no vma on a 32-bit arch can span beyond the 4GB.
+ * Note that vma lock only exists for shared/non-private
+ * vmas. Therefore, lock is not held when calling
+ * unmap_hugepage_range for private vmas.
*/
- if (vma->vm_pgoff < start)
- v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
- else
- v_offset = 0;
-
- if (!end)
- v_end = vma->vm_end;
- else {
- v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
- + vma->vm_start;
- if (v_end > vma->vm_end)
- v_end = vma->vm_end;
- }
+ hugetlb_vma_unlock_write(vma);
+ }
+}
- unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
- NULL, zap_flags);
+/*
+ * Called with hugetlb fault mutex held.
+ * Returns true if page was actually removed, false otherwise.
+ */
+static bool remove_inode_single_folio(struct hstate *h, struct inode *inode,
+ struct address_space *mapping,
+ struct folio *folio, pgoff_t index,
+ bool truncate_op)
+{
+ bool ret = false;
+
+ /*
+ * If folio is mapped, it was faulted in after being
+ * unmapped in caller. Unmap (again) while holding
+ * the fault mutex. The mutex will prevent faults
+ * until we finish removing the folio.
+ */
+ if (unlikely(folio_mapped(folio)))
+ hugetlb_unmap_file_folio(h, mapping, folio, index);
+
+ folio_lock(folio);
+ /*
+ * We must remove the folio from page cache before removing
+ * the region/ reserve map (hugetlb_unreserve_pages). In
+ * rare out of memory conditions, removal of the region/reserve
+ * map could fail. Correspondingly, the subpool and global
+ * reserve usage count can need to be adjusted.
+ */
+ VM_BUG_ON(HPageRestoreReserve(&folio->page));
+ hugetlb_delete_from_page_cache(&folio->page);
+ ret = true;
+ if (!truncate_op) {
+ if (unlikely(hugetlb_unreserve_pages(inode, index,
+ index + 1, 1)))
+ hugetlb_fix_reserve_counts(inode);
}
+
+ folio_unlock(folio);
+ return ret;
}
/*
@@ -418,10 +594,10 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
* truncation is indicated by end of range being LLONG_MAX
* In this case, we first scan the range and release found pages.
* After releasing pages, hugetlb_unreserve_pages cleans up region/reserve
- * maps and global counts. Page faults can not race with truncation
- * in this routine. hugetlb_no_page() holds i_mmap_rwsem and prevents
- * page faults in the truncated range by checking i_size. i_size is
- * modified while holding i_mmap_rwsem.
+ * maps and global counts. Page faults can race with truncation.
+ * During faults, hugetlb_no_page() checks i_size before page allocation,
+ * and again after obtaining page table lock. It will 'back out'
+ * allocations in the truncated range.
* hole punch is indicated if end is not LLONG_MAX
* In the hole punch case we scan the range and release found pages.
* Only when releasing a page is the associated region/reserve map
@@ -451,61 +627,17 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
u32 hash = 0;
index = folio->index;
- if (!truncate_op) {
- /*
- * Only need to hold the fault mutex in the
- * hole punch case. This prevents races with
- * page faults. Races are not possible in the
- * case of truncation.
- */
- hash = hugetlb_fault_mutex_hash(mapping, index);
- mutex_lock(&hugetlb_fault_mutex_table[hash]);
- }
+ hash = hugetlb_fault_mutex_hash(mapping, index);
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
/*
- * If folio is mapped, it was faulted in after being
- * unmapped in caller. Unmap (again) now after taking
- * the fault mutex. The mutex will prevent faults
- * until we finish removing the folio.
- *
- * This race can only happen in the hole punch case.
- * Getting here in a truncate operation is a bug.
+ * Remove folio that was part of folio_batch.
*/
- if (unlikely(folio_mapped(folio))) {
- BUG_ON(truncate_op);
-
- mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- i_mmap_lock_write(mapping);
- mutex_lock(&hugetlb_fault_mutex_table[hash]);
- hugetlb_vmdelete_list(&mapping->i_mmap,
- index * pages_per_huge_page(h),
- (index + 1) * pages_per_huge_page(h),
- ZAP_FLAG_DROP_MARKER);
- i_mmap_unlock_write(mapping);
- }
-
- folio_lock(folio);
- /*
- * We must free the huge page and remove from page
- * cache (remove_huge_page) BEFORE removing the
- * region/reserve map (hugetlb_unreserve_pages). In
- * rare out of memory conditions, removal of the
- * region/reserve map could fail. Correspondingly,
- * the subpool and global reserve usage count can need
- * to be adjusted.
- */
- VM_BUG_ON(HPageRestoreReserve(&folio->page));
- remove_huge_page(&folio->page);
- freed++;
- if (!truncate_op) {
- if (unlikely(hugetlb_unreserve_pages(inode,
- index, index + 1, 1)))
- hugetlb_fix_reserve_counts(inode);
- }
-
- folio_unlock(folio);
- if (!truncate_op)
- mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ if (remove_inode_single_folio(h, inode, mapping, folio,
+ index, truncate_op))
+ freed++;
+
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
}
folio_batch_release(&fbatch);
cond_resched();
@@ -543,8 +675,8 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
BUG_ON(offset & ~huge_page_mask(h));
pgoff = offset >> PAGE_SHIFT;
- i_mmap_lock_write(mapping);
i_size_write(inode, offset);
+ i_mmap_lock_write(mapping);
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0,
ZAP_FLAG_DROP_MARKER);
@@ -703,11 +835,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
/* addr is the offset within the file (zero based) */
addr = index * hpage_size;
- /*
- * fault mutex taken here, protects against fault path
- * and hole punch. inode_lock previously taken protects
- * against truncation.
- */
+ /* mutex taken here, fault path and hole punch */
hash = hugetlb_fault_mutex_hash(mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
@@ -737,7 +865,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
}
clear_huge_page(page, addr, pages_per_huge_page(h));
__SetPageUptodate(page);
- error = huge_add_to_page_cache(page, mapping, index);
+ error = hugetlb_add_to_page_cache(page, mapping, index);
if (unlikely(error)) {
restore_reserve_on_error(h, &pseudo_vma, addr, page);
put_page(page);
@@ -749,7 +877,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
SetHPageMigratable(page);
/*
- * unlock_page because locked by huge_add_to_page_cache()
+ * unlock_page because locked by hugetlb_add_to_page_cache()
* put_page() due to reference from alloc_huge_page()
*/
unlock_page(page);
@@ -885,33 +1013,18 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
/*
* File creation. Allocate an inode, and we're done..
*/
-static int do_hugetlbfs_mknod(struct inode *dir,
- struct dentry *dentry,
- umode_t mode,
- dev_t dev,
- bool tmpfile)
+static int hugetlbfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode *inode;
- int error = -ENOSPC;
inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
- if (inode) {
- dir->i_ctime = dir->i_mtime = current_time(dir);
- if (tmpfile) {
- d_tmpfile(dentry, inode);
- } else {
- d_instantiate(dentry, inode);
- dget(dentry);/* Extra count - pin the dentry in core */
- }
- error = 0;
- }
- return error;
-}
-
-static int hugetlbfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode, dev_t dev)
-{
- return do_hugetlbfs_mknod(dir, dentry, mode, dev, false);
+ if (!inode)
+ return -ENOSPC;
+ dir->i_ctime = dir->i_mtime = current_time(dir);
+ d_instantiate(dentry, inode);
+ dget(dentry);/* Extra count - pin the dentry in core */
+ return 0;
}
static int hugetlbfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
@@ -932,10 +1045,17 @@ static int hugetlbfs_create(struct user_namespace *mnt_userns,
}
static int hugetlbfs_tmpfile(struct user_namespace *mnt_userns,
- struct inode *dir, struct dentry *dentry,
+ struct inode *dir, struct file *file,
umode_t mode)
{
- return do_hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0, true);
+ struct inode *inode;
+
+ inode = hugetlbfs_get_inode(dir->i_sb, dir, mode | S_IFREG, 0);
+ if (!inode)
+ return -ENOSPC;
+ dir->i_ctime = dir->i_mtime = current_time(dir);
+ d_tmpfile(file, inode);
+ return finish_open_simple(file, 0);
}
static int hugetlbfs_symlink(struct user_namespace *mnt_userns,
@@ -994,7 +1114,7 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping,
struct inode *inode = mapping->host;
pgoff_t index = page->index;
- remove_huge_page(page);
+ hugetlb_delete_from_page_cache(page);
if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1)))
hugetlb_fix_reserve_counts(inode);
diff --git a/fs/inode.c b/fs/inode.c
index ba1de23c13c1..b608528efd3a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -192,8 +192,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_wb_frn_history = 0;
#endif
- if (security_inode_alloc(inode))
- goto out;
spin_lock_init(&inode->i_lock);
lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
@@ -228,11 +226,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_fsnotify_mask = 0;
#endif
inode->i_flctx = NULL;
+
+ if (unlikely(security_inode_alloc(inode)))
+ return -ENOMEM;
this_cpu_inc(nr_inodes);
return 0;
-out:
- return -ENOMEM;
}
EXPORT_SYMBOL(inode_init_always);
diff --git a/fs/internal.h b/fs/internal.h
index 87e96b9024ce..6f0386b34fae 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -16,6 +16,7 @@ struct shrink_control;
struct fs_context;
struct user_namespace;
struct pipe_inode_info;
+struct iov_iter;
/*
* block/bdev.c
@@ -62,7 +63,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
int do_rmdir(int dfd, struct filename *name);
int do_unlinkat(int dfd, struct filename *name);
-int may_linkat(struct user_namespace *mnt_userns, struct path *link);
+int may_linkat(struct user_namespace *mnt_userns, const struct path *link);
int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
struct filename *newname, unsigned int flags);
int do_mkdirat(int dfd, struct filename *name, umode_t mode);
@@ -101,6 +102,16 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
extern struct file *alloc_empty_file(int, const struct cred *);
extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
+static inline void put_file_access(struct file *file)
+{
+ if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
+ i_readcount_dec(file->f_inode);
+ } else if (file->f_mode & FMODE_WRITER) {
+ put_write_access(file->f_inode);
+ __mnt_drop_write(file->f_path.mnt);
+ }
+}
+
/*
* super.c
*/
@@ -221,3 +232,5 @@ ssize_t do_getxattr(struct user_namespace *mnt_userns,
int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct xattr_ctx *ctx);
+
+ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index ca5c62901541..91ee0b308e13 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1360,6 +1360,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
error = wpc->ops->map_blocks(wpc, inode, pos);
if (error)
break;
+ trace_iomap_writepage_map(inode, &wpc->iomap);
if (WARN_ON_ONCE(wpc->iomap.type == IOMAP_INLINE))
continue;
if (wpc->iomap.type == IOMAP_HOLE)
@@ -1421,7 +1422,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
if (!count)
folio_end_writeback(folio);
done:
- mapping_set_error(folio->mapping, error);
+ mapping_set_error(inode->i_mapping, error);
return error;
}
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index d48868fc40d7..f6ea9540d082 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -148,6 +148,7 @@ DEFINE_EVENT(iomap_class, name, \
TP_ARGS(inode, iomap))
DEFINE_IOMAP_EVENT(iomap_iter_dstmap);
DEFINE_IOMAP_EVENT(iomap_iter_srcmap);
+DEFINE_IOMAP_EVENT(iomap_writepage_map);
TRACE_EVENT(iomap_iter,
TP_PROTO(struct iomap_iter *iter, const void *ops,
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index b466172eec25..c4da3f634b92 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -67,8 +67,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
for ( i = 0 ; i < pcount ; i++ ) {
if (!pages[i])
continue;
- memset(page_address(pages[i]), 0, PAGE_SIZE);
- flush_dcache_page(pages[i]);
+ memzero_page(pages[i], 0, PAGE_SIZE);
SetPageUptodate(pages[i]);
}
return ((loff_t)pcount) << PAGE_SHIFT;
@@ -82,7 +81,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
return 0;
}
haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks);
- ll_rw_block(REQ_OP_READ, haveblocks, bhs);
+ bh_read_batch(haveblocks, bhs);
curbh = 0;
curpage = 0;
@@ -120,7 +119,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
zerr != Z_STREAM_END) {
if (!stream.avail_out) {
if (pages[curpage]) {
- stream.next_out = page_address(pages[curpage])
+ stream.next_out = kmap_local_page(pages[curpage])
+ poffset;
stream.avail_out = PAGE_SIZE - poffset;
poffset = 0;
@@ -176,6 +175,10 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
flush_dcache_page(pages[curpage]);
SetPageUptodate(pages[curpage]);
}
+ if (stream.next_out != (unsigned char *)zisofs_sink_page) {
+ kunmap_local(stream.next_out);
+ stream.next_out = NULL;
+ }
curpage++;
}
if (!stream.avail_in)
@@ -183,6 +186,8 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
}
inflate_out:
zlib_inflateEnd(&stream);
+ if (stream.next_out && stream.next_out != (unsigned char *)zisofs_sink_page)
+ kunmap_local(stream.next_out);
z_eio:
mutex_unlock(&zisofs_zlib_lock);
@@ -283,9 +288,7 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
}
if (poffset && *pages) {
- memset(page_address(*pages) + poffset, 0,
- PAGE_SIZE - poffset);
- flush_dcache_page(*pages);
+ memzero_page(*pages, poffset, PAGE_SIZE - poffset);
SetPageUptodate(*pages);
}
return 0;
@@ -343,10 +346,8 @@ static int zisofs_read_folio(struct file *file, struct folio *folio)
for (i = 0; i < pcount; i++, index++) {
if (i != full_page)
pages[i] = grab_cache_page_nowait(mapping, index);
- if (pages[i]) {
+ if (pages[i])
ClearPageError(pages[i]);
- kmap(pages[i]);
- }
}
err = zisofs_fill_pages(inode, full_page, pcount, pages);
@@ -357,7 +358,6 @@ static int zisofs_read_folio(struct file *file, struct folio *folio)
flush_dcache_page(pages[i]);
if (i == full_page && err)
SetPageError(pages[i]);
- kunmap(pages[i]);
unlock_page(pages[i]);
if (i != full_page)
put_page(pages[i]);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 88bf20303466..df9d70588b60 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1277,13 +1277,11 @@ static int isofs_read_level3_size(struct inode *inode)
} while (more_entries);
out:
kfree(tmpde);
- if (bh)
- brelse(bh);
+ brelse(bh);
return 0;
out_nomem:
- if (bh)
- brelse(bh);
+ brelse(bh);
return -ENOMEM;
out_noread:
@@ -1486,8 +1484,7 @@ static int isofs_read_inode(struct inode *inode, int relocated)
ret = 0;
out:
kfree(tmpde);
- if (bh)
- brelse(bh);
+ brelse(bh);
return ret;
out_badread:
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b2b2bc9b88d9..885a7a6cc53e 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -122,8 +122,8 @@ static int journal_submit_commit_record(journal_t *journal,
{
struct commit_header *tmp;
struct buffer_head *bh;
- int ret;
struct timespec64 now;
+ blk_opf_t write_flags = REQ_OP_WRITE | REQ_SYNC;
*cbh = NULL;
@@ -155,13 +155,11 @@ static int journal_submit_commit_record(journal_t *journal,
if (journal->j_flags & JBD2_BARRIER &&
!jbd2_has_feature_async_commit(journal))
- ret = submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH |
- REQ_FUA, bh);
- else
- ret = submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
+ write_flags |= REQ_PREFLUSH | REQ_FUA;
+ submit_bh(write_flags, bh);
*cbh = bh;
- return ret;
+ return 0;
}
/*
@@ -570,7 +568,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
journal->j_running_transaction = NULL;
start_time = ktime_get();
commit_transaction->t_log_start = journal->j_head;
- wake_up(&journal->j_wait_transaction_locked);
+ wake_up_all(&journal->j_wait_transaction_locked);
write_unlock(&journal->j_state_lock);
jbd2_debug(3, "JBD2: commit phase 2a\n");
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 6350d3857c89..2696f43e7239 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -923,10 +923,16 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) {
bh = journal->j_fc_wbuf[i];
wait_on_buffer(bh);
+ /*
+ * Update j_fc_off so jbd2_fc_release_bufs can release remain
+ * buffer head.
+ */
+ if (unlikely(!buffer_uptodate(bh))) {
+ journal->j_fc_off = i + 1;
+ return -EIO;
+ }
put_bh(bh);
journal->j_fc_wbuf[i] = NULL;
- if (unlikely(!buffer_uptodate(bh)))
- return -EIO;
}
return 0;
@@ -1606,7 +1612,7 @@ static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags)
{
struct buffer_head *bh = journal->j_sb_buffer;
journal_superblock_t *sb = journal->j_superblock;
- int ret;
+ int ret = 0;
/* Buffer got discarded which means block device got invalidated */
if (!buffer_mapped(bh)) {
@@ -1636,7 +1642,7 @@ static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags)
sb->s_checksum = jbd2_superblock_csum(journal, sb);
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
- ret = submit_bh(REQ_OP_WRITE | write_flags, bh);
+ submit_bh(REQ_OP_WRITE | write_flags, bh);
wait_on_buffer(bh);
if (buffer_write_io_error(bh)) {
clear_buffer_write_io_error(bh);
@@ -1644,9 +1650,8 @@ static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags)
ret = -EIO;
}
if (ret) {
- printk(KERN_ERR "JBD2: Error %d detected when updating "
- "journal superblock for %s.\n", ret,
- journal->j_devname);
+ printk(KERN_ERR "JBD2: I/O error when updating journal superblock for %s.\n",
+ journal->j_devname);
if (!is_journal_aborted(journal))
jbd2_journal_abort(journal, ret);
}
@@ -1893,19 +1898,16 @@ static int journal_get_superblock(journal_t *journal)
{
struct buffer_head *bh;
journal_superblock_t *sb;
- int err = -EIO;
+ int err;
bh = journal->j_sb_buffer;
J_ASSERT(bh != NULL);
- if (!buffer_uptodate(bh)) {
- ll_rw_block(REQ_OP_READ, 1, &bh);
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh)) {
- printk(KERN_ERR
- "JBD2: IO error reading journal superblock\n");
- goto out;
- }
+ err = bh_read(bh, 0);
+ if (err < 0) {
+ printk(KERN_ERR
+ "JBD2: IO error reading journal superblock\n");
+ goto out;
}
if (buffer_verified(bh))
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index f548479615c6..8286a9ec122f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -100,7 +100,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
bufs[nbufs++] = bh;
if (nbufs == MAXBUF) {
- ll_rw_block(REQ_OP_READ, nbufs, bufs);
+ bh_readahead_batch(nbufs, bufs, 0);
journal_brelse_array(bufs, nbufs);
nbufs = 0;
}
@@ -109,7 +109,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
}
if (nbufs)
- ll_rw_block(REQ_OP_READ, nbufs, bufs);
+ bh_readahead_batch(nbufs, bufs, 0);
err = 0;
failed:
@@ -152,9 +152,14 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
return -ENOMEM;
if (!buffer_uptodate(bh)) {
- /* If this is a brand new buffer, start readahead.
- Otherwise, we assume we are already reading it. */
- if (!buffer_req(bh))
+ /*
+ * If this is a brand new buffer, start readahead.
+ * Otherwise, we assume we are already reading it.
+ */
+ bool need_readahead = !buffer_req(bh);
+
+ bh_read_nowait(bh, 0);
+ if (need_readahead)
do_readahead(journal, offset);
wait_on_buffer(bh);
}
@@ -256,6 +261,7 @@ static int fc_do_one_pass(journal_t *journal,
err = journal->j_fc_replay_callback(journal, bh, pass,
next_fc_block - journal->j_fc_first,
expected_commit_id);
+ brelse(bh);
next_fc_block++;
if (err < 0 || err == JBD2_FC_REPLAY_STOP)
break;
@@ -687,7 +693,6 @@ static int do_one_pass(journal_t *journal,
mark_buffer_dirty(nbh);
BUFFER_TRACE(nbh, "marking uptodate");
++info->nr_replays;
- /* ll_rw_block(WRITE, 1, &nbh); */
unlock_buffer(nbh);
brelse(obh);
brelse(nbh);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e1be93ccd81c..6a404ac1c178 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -168,7 +168,7 @@ static void wait_transaction_locked(journal_t *journal)
int need_to_start;
tid_t tid = journal->j_running_transaction->t_tid;
- prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
+ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE);
need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock);
@@ -194,7 +194,7 @@ static void wait_transaction_switching(journal_t *journal)
read_unlock(&journal->j_state_lock);
return;
}
- prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
+ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE);
read_unlock(&journal->j_state_lock);
/*
@@ -920,7 +920,7 @@ void jbd2_journal_unlock_updates (journal_t *journal)
write_lock(&journal->j_state_lock);
--journal->j_barrier_count;
write_unlock(&journal->j_state_lock);
- wake_up(&journal->j_wait_transaction_locked);
+ wake_up_all(&journal->j_wait_transaction_locked);
}
static void warn_dirty_buffer(struct buffer_head *bh)
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index c6821a509481..4061e0ba7010 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1035,7 +1035,7 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
{
int i, ret;
int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
- struct mtd_oob_ops ops;
+ struct mtd_oob_ops ops = { };
ops.mode = MTD_OPS_AUTO_OOB;
ops.ooblen = NR_OOB_SCAN_PAGES * c->oobavail;
@@ -1076,7 +1076,7 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c,
struct jffs2_eraseblock *jeb)
{
- struct mtd_oob_ops ops;
+ struct mtd_oob_ops ops = { };
int ret, cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
ops.mode = MTD_OPS_AUTO_OOB;
@@ -1101,7 +1101,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
struct jffs2_eraseblock *jeb)
{
int ret;
- struct mtd_oob_ops ops;
+ struct mtd_oob_ops ops = { };
int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
ops.mode = MTD_OPS_AUTO_OOB;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 1cc88ba6de90..3990f3e270cb 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -472,6 +472,16 @@ static void kernfs_drain(struct kernfs_node *kn)
lockdep_assert_held_write(&root->kernfs_rwsem);
WARN_ON_ONCE(kernfs_active(kn));
+ /*
+ * Skip draining if already fully drained. This avoids draining and its
+ * lockdep annotations for nodes which have never been activated
+ * allowing embedding kernfs_remove() in create error paths without
+ * worrying about draining.
+ */
+ if (atomic_read(&kn->active) == KN_DEACTIVATED_BIAS &&
+ !kernfs_should_drain_open_files(kn))
+ return;
+
up_write(&root->kernfs_rwsem);
if (kernfs_lockdep(kn)) {
@@ -480,7 +490,6 @@ static void kernfs_drain(struct kernfs_node *kn)
lock_contended(&kn->dep_map, _RET_IP_);
}
- /* but everyone should wait for draining */
wait_event(root->deactivate_waitq,
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
@@ -489,7 +498,8 @@ static void kernfs_drain(struct kernfs_node *kn)
rwsem_release(&kn->dep_map, _RET_IP_);
}
- kernfs_drain_open_files(kn);
+ if (kernfs_should_drain_open_files(kn))
+ kernfs_drain_open_files(kn);
down_write(&root->kernfs_rwsem);
}
@@ -695,13 +705,7 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
goto err_unlock;
}
- /*
- * ACTIVATED is protected with kernfs_mutex but it was clear when
- * @kn was added to idr and we just wanna see it set. No need to
- * grab kernfs_mutex.
- */
- if (unlikely(!(kn->flags & KERNFS_ACTIVATED) ||
- !atomic_inc_not_zero(&kn->count)))
+ if (unlikely(!kernfs_active(kn) || !atomic_inc_not_zero(&kn->count)))
goto err_unlock;
spin_unlock(&kernfs_idr_lock);
@@ -743,10 +747,7 @@ int kernfs_add_one(struct kernfs_node *kn)
goto out_unlock;
ret = -ENOENT;
- if (parent->flags & KERNFS_EMPTY_DIR)
- goto out_unlock;
-
- if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
+ if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR))
goto out_unlock;
kn->hash = kernfs_name_hash(kn->name, kn->ns);
@@ -1304,6 +1305,21 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
return pos->parent;
}
+static void kernfs_activate_one(struct kernfs_node *kn)
+{
+ lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);
+
+ kn->flags |= KERNFS_ACTIVATED;
+
+ if (kernfs_active(kn) || (kn->flags & (KERNFS_HIDDEN | KERNFS_REMOVING)))
+ return;
+
+ WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb));
+ WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
+
+ atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
+}
+
/**
* kernfs_activate - activate a node which started deactivated
* @kn: kernfs_node whose subtree is to be activated
@@ -1325,15 +1341,42 @@ void kernfs_activate(struct kernfs_node *kn)
down_write(&root->kernfs_rwsem);
pos = NULL;
- while ((pos = kernfs_next_descendant_post(pos, kn))) {
- if (pos->flags & KERNFS_ACTIVATED)
- continue;
+ while ((pos = kernfs_next_descendant_post(pos, kn)))
+ kernfs_activate_one(pos);
- WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
- WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
+ up_write(&root->kernfs_rwsem);
+}
- atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
- pos->flags |= KERNFS_ACTIVATED;
+/**
+ * kernfs_show - show or hide a node
+ * @kn: kernfs_node to show or hide
+ * @show: whether to show or hide
+ *
+ * If @show is %false, @kn is marked hidden and deactivated. A hidden node is
+ * ignored in future activaitons. If %true, the mark is removed and activation
+ * state is restored. This function won't implicitly activate a new node in a
+ * %KERNFS_ROOT_CREATE_DEACTIVATED root which hasn't been activated yet.
+ *
+ * To avoid recursion complexities, directories aren't supported for now.
+ */
+void kernfs_show(struct kernfs_node *kn, bool show)
+{
+ struct kernfs_root *root = kernfs_root(kn);
+
+ if (WARN_ON_ONCE(kernfs_type(kn) == KERNFS_DIR))
+ return;
+
+ down_write(&root->kernfs_rwsem);
+
+ if (show) {
+ kn->flags &= ~KERNFS_HIDDEN;
+ if (kn->flags & KERNFS_ACTIVATED)
+ kernfs_activate_one(kn);
+ } else {
+ kn->flags |= KERNFS_HIDDEN;
+ if (kernfs_active(kn))
+ atomic_add(KN_DEACTIVATED_BIAS, &kn->active);
+ kernfs_drain(kn);
}
up_write(&root->kernfs_rwsem);
@@ -1358,34 +1401,27 @@ static void __kernfs_remove(struct kernfs_node *kn)
pr_debug("kernfs %s: removing\n", kn->name);
- /* prevent any new usage under @kn by deactivating all nodes */
+ /* prevent new usage by marking all nodes removing and deactivating */
pos = NULL;
- while ((pos = kernfs_next_descendant_post(pos, kn)))
+ while ((pos = kernfs_next_descendant_post(pos, kn))) {
+ pos->flags |= KERNFS_REMOVING;
if (kernfs_active(pos))
atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
+ }
/* deactivate and unlink the subtree node-by-node */
do {
pos = kernfs_leftmost_descendant(kn);
/*
- * kernfs_drain() drops kernfs_rwsem temporarily and @pos's
+ * kernfs_drain() may drop kernfs_rwsem temporarily and @pos's
* base ref could have been put by someone else by the time
* the function returns. Make sure it doesn't go away
* underneath us.
*/
kernfs_get(pos);
- /*
- * Drain iff @kn was activated. This avoids draining and
- * its lockdep annotations for nodes which have never been
- * activated and allows embedding kernfs_remove() in create
- * error paths without worrying about draining.
- */
- if (kn->flags & KERNFS_ACTIVATED)
- kernfs_drain(pos);
- else
- WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
+ kernfs_drain(pos);
/*
* kernfs_unlink_sibling() succeeds once per node. Use it
@@ -1585,8 +1621,11 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
down_write(&root->kernfs_rwsem);
kn = kernfs_find_ns(parent, name, ns);
- if (kn)
+ if (kn) {
+ kernfs_get(kn);
__kernfs_remove(kn);
+ kernfs_put(kn);
+ }
up_write(&root->kernfs_rwsem);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index b3ec34386b43..9ab6c92e02da 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -23,6 +23,8 @@ struct kernfs_open_node {
atomic_t event;
wait_queue_head_t poll;
struct list_head files; /* goes through kernfs_open_file.list */
+ unsigned int nr_mmapped;
+ unsigned int nr_to_release;
};
/*
@@ -57,31 +59,17 @@ static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
}
/**
- * kernfs_deref_open_node - Get kernfs_open_node corresponding to @kn.
- *
- * @of: associated kernfs_open_file instance.
- * @kn: target kernfs_node.
- *
- * Fetch and return ->attr.open of @kn if @of->list is non empty.
- * If @of->list is not empty we can safely assume that @of is on
- * @kn->attr.open->files list and this guarantees that @kn->attr.open
- * will not vanish i.e. dereferencing outside RCU read-side critical
- * section is safe here.
- *
- * The caller needs to make sure that @of->list is not empty.
+ * of_on - Return the kernfs_open_node of the specified kernfs_open_file
+ * @of: taret kernfs_open_file
*/
-static struct kernfs_open_node *
-kernfs_deref_open_node(struct kernfs_open_file *of, struct kernfs_node *kn)
+static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
{
- struct kernfs_open_node *on;
-
- on = rcu_dereference_check(kn->attr.open, !list_empty(&of->list));
-
- return on;
+ return rcu_dereference_protected(of->kn->attr.open,
+ !list_empty(&of->list));
}
/**
- * kernfs_deref_open_node_protected - Get kernfs_open_node corresponding to @kn
+ * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
*
* @kn: target kernfs_node.
*
@@ -96,7 +84,7 @@ kernfs_deref_open_node(struct kernfs_open_file *of, struct kernfs_node *kn)
* The caller needs to make sure that kernfs_open_file_mutex is held.
*/
static struct kernfs_open_node *
-kernfs_deref_open_node_protected(struct kernfs_node *kn)
+kernfs_deref_open_node_locked(struct kernfs_node *kn)
{
return rcu_dereference_protected(kn->attr.open,
lockdep_is_held(kernfs_open_file_mutex_ptr(kn)));
@@ -207,12 +195,8 @@ static void kernfs_seq_stop(struct seq_file *sf, void *v)
static int kernfs_seq_show(struct seq_file *sf, void *v)
{
struct kernfs_open_file *of = sf->private;
- struct kernfs_open_node *on = kernfs_deref_open_node(of, of->kn);
-
- if (!on)
- return -EINVAL;
- of->event = atomic_read(&on->event);
+ of->event = atomic_read(&of_on(of)->event);
return of->kn->attr.ops->seq_show(sf, v);
}
@@ -235,7 +219,6 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
const struct kernfs_ops *ops;
- struct kernfs_open_node *on;
char *buf;
buf = of->prealloc_buf;
@@ -257,14 +240,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out_free;
}
- on = kernfs_deref_open_node(of, of->kn);
- if (!on) {
- len = -EINVAL;
- mutex_unlock(&of->mutex);
- goto out_free;
- }
-
- of->event = atomic_read(&on->event);
+ of->event = atomic_read(&of_on(of)->event);
ops = kernfs_ops(of->kn);
if (ops->read)
@@ -553,6 +529,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
rc = 0;
of->mmapped = true;
+ of_on(of)->nr_mmapped++;
of->vm_ops = vma->vm_ops;
vma->vm_ops = &kernfs_vm_ops;
out_put:
@@ -580,31 +557,30 @@ out_unlock:
static int kernfs_get_open_node(struct kernfs_node *kn,
struct kernfs_open_file *of)
{
- struct kernfs_open_node *on, *new_on = NULL;
- struct mutex *mutex = NULL;
+ struct kernfs_open_node *on;
+ struct mutex *mutex;
mutex = kernfs_open_file_mutex_lock(kn);
- on = kernfs_deref_open_node_protected(kn);
+ on = kernfs_deref_open_node_locked(kn);
- if (on) {
- list_add_tail(&of->list, &on->files);
- mutex_unlock(mutex);
- return 0;
- } else {
+ if (!on) {
/* not there, initialize a new one */
- new_on = kmalloc(sizeof(*new_on), GFP_KERNEL);
- if (!new_on) {
+ on = kzalloc(sizeof(*on), GFP_KERNEL);
+ if (!on) {
mutex_unlock(mutex);
return -ENOMEM;
}
- atomic_set(&new_on->event, 1);
- init_waitqueue_head(&new_on->poll);
- INIT_LIST_HEAD(&new_on->files);
- list_add_tail(&of->list, &new_on->files);
- rcu_assign_pointer(kn->attr.open, new_on);
+ atomic_set(&on->event, 1);
+ init_waitqueue_head(&on->poll);
+ INIT_LIST_HEAD(&on->files);
+ rcu_assign_pointer(kn->attr.open, on);
}
- mutex_unlock(mutex);
+ list_add_tail(&of->list, &on->files);
+ if (kn->flags & KERNFS_HAS_RELEASE)
+ on->nr_to_release++;
+
+ mutex_unlock(mutex);
return 0;
}
@@ -613,6 +589,7 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
*
* @kn: target kernfs_node
* @of: associated kernfs_open_file
+ * @open_failed: ->open() failed, cancel ->release()
*
* Unlink @of from list of @kn's associated open files. If list of
* associated open files becomes empty, disassociate and free
@@ -622,21 +599,30 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
* None.
*/
static void kernfs_unlink_open_file(struct kernfs_node *kn,
- struct kernfs_open_file *of)
+ struct kernfs_open_file *of,
+ bool open_failed)
{
struct kernfs_open_node *on;
- struct mutex *mutex = NULL;
+ struct mutex *mutex;
mutex = kernfs_open_file_mutex_lock(kn);
- on = kernfs_deref_open_node_protected(kn);
+ on = kernfs_deref_open_node_locked(kn);
if (!on) {
mutex_unlock(mutex);
return;
}
- if (of)
+ if (of) {
+ if (kn->flags & KERNFS_HAS_RELEASE) {
+ WARN_ON_ONCE(of->released == open_failed);
+ if (open_failed)
+ on->nr_to_release--;
+ }
+ if (of->mmapped)
+ on->nr_mmapped--;
list_del(&of->list);
+ }
if (list_empty(&on->files)) {
rcu_assign_pointer(kn->attr.open, NULL);
@@ -763,7 +749,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
return 0;
err_put_node:
- kernfs_unlink_open_file(kn, of);
+ kernfs_unlink_open_file(kn, of, true);
err_seq_release:
seq_release(inode, file);
err_free:
@@ -795,6 +781,7 @@ static void kernfs_release_file(struct kernfs_node *kn,
*/
kn->attr.ops->release(of);
of->released = true;
+ of_on(of)->nr_to_release--;
}
}
@@ -802,15 +789,16 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
{
struct kernfs_node *kn = inode->i_private;
struct kernfs_open_file *of = kernfs_of(filp);
- struct mutex *mutex = NULL;
if (kn->flags & KERNFS_HAS_RELEASE) {
+ struct mutex *mutex;
+
mutex = kernfs_open_file_mutex_lock(kn);
kernfs_release_file(kn, of);
mutex_unlock(mutex);
}
- kernfs_unlink_open_file(kn, of);
+ kernfs_unlink_open_file(kn, of, false);
seq_release(inode, filp);
kfree(of->prealloc_buf);
kfree(of);
@@ -818,28 +806,33 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
return 0;
}
-void kernfs_drain_open_files(struct kernfs_node *kn)
+bool kernfs_should_drain_open_files(struct kernfs_node *kn)
{
struct kernfs_open_node *on;
- struct kernfs_open_file *of;
- struct mutex *mutex = NULL;
-
- if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
- return;
+ bool ret;
/*
- * lockless opportunistic check is safe below because no one is adding to
- * ->attr.open at this point of time. This check allows early bail out
- * if ->attr.open is already NULL. kernfs_unlink_open_file makes
- * ->attr.open NULL only while holding kernfs_open_file_mutex so below
- * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if
- * ->attr.open became NULL while waiting for the mutex.
+ * @kn being deactivated guarantees that @kn->attr.open can't change
+ * beneath us making the lockless test below safe.
*/
- if (!rcu_access_pointer(kn->attr.open))
- return;
+ WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
+
+ rcu_read_lock();
+ on = rcu_dereference(kn->attr.open);
+ ret = on && (on->nr_mmapped || on->nr_to_release);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+void kernfs_drain_open_files(struct kernfs_node *kn)
+{
+ struct kernfs_open_node *on;
+ struct kernfs_open_file *of;
+ struct mutex *mutex;
mutex = kernfs_open_file_mutex_lock(kn);
- on = kernfs_deref_open_node_protected(kn);
+ on = kernfs_deref_open_node_locked(kn);
if (!on) {
mutex_unlock(mutex);
return;
@@ -848,13 +841,17 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
list_for_each_entry(of, &on->files, list) {
struct inode *inode = file_inode(of->file);
- if (kn->flags & KERNFS_HAS_MMAP)
+ if (of->mmapped) {
unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+ of->mmapped = false;
+ on->nr_mmapped--;
+ }
if (kn->flags & KERNFS_HAS_RELEASE)
kernfs_release_file(kn, of);
}
+ WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
mutex_unlock(mutex);
}
@@ -874,11 +871,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
*/
__poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
{
- struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry);
- struct kernfs_open_node *on = kernfs_deref_open_node(of, kn);
-
- if (!on)
- return EPOLLERR;
+ struct kernfs_open_node *on = of_on(of);
poll_wait(of->file, &on->poll, wait);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 3ae214d02d44..fc5821effd97 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -157,6 +157,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
*/
extern const struct file_operations kernfs_file_fops;
+bool kernfs_should_drain_open_files(struct kernfs_node *kn);
void kernfs_drain_open_files(struct kernfs_node *kn);
/*
diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c
index c5a5c7b90d72..2a39ffb8423b 100644
--- a/fs/ksmbd/auth.c
+++ b/fs/ksmbd/auth.c
@@ -424,6 +424,9 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
NTLMSSP_NEGOTIATE_56);
}
+ if (cflags & NTLMSSP_NEGOTIATE_SEAL && smb3_encryption_negotiated(conn))
+ flags |= NTLMSSP_NEGOTIATE_SEAL;
+
if (cflags & NTLMSSP_NEGOTIATE_ALWAYS_SIGN)
flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
@@ -984,13 +987,16 @@ out:
return rc;
}
-static int ksmbd_get_encryption_key(struct ksmbd_conn *conn, __u64 ses_id,
+static int ksmbd_get_encryption_key(struct ksmbd_work *work, __u64 ses_id,
int enc, u8 *key)
{
struct ksmbd_session *sess;
u8 *ses_enc_key;
- sess = ksmbd_session_lookup_all(conn, ses_id);
+ if (enc)
+ sess = work->sess;
+ else
+ sess = ksmbd_session_lookup_all(work->conn, ses_id);
if (!sess)
return -EINVAL;
@@ -1078,9 +1084,10 @@ static struct scatterlist *ksmbd_init_sg(struct kvec *iov, unsigned int nvec,
return sg;
}
-int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
+int ksmbd_crypt_message(struct ksmbd_work *work, struct kvec *iov,
unsigned int nvec, int enc)
{
+ struct ksmbd_conn *conn = work->conn;
struct smb2_transform_hdr *tr_hdr = smb2_get_msg(iov[0].iov_base);
unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
int rc;
@@ -1094,7 +1101,7 @@ int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
struct ksmbd_crypto_ctx *ctx;
- rc = ksmbd_get_encryption_key(conn,
+ rc = ksmbd_get_encryption_key(work,
le64_to_cpu(tr_hdr->SessionId),
enc,
key);
diff --git a/fs/ksmbd/auth.h b/fs/ksmbd/auth.h
index 25b772653de0..362b6159a6cf 100644
--- a/fs/ksmbd/auth.h
+++ b/fs/ksmbd/auth.h
@@ -33,9 +33,10 @@
struct ksmbd_session;
struct ksmbd_conn;
+struct ksmbd_work;
struct kvec;
-int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
+int ksmbd_crypt_message(struct ksmbd_work *work, struct kvec *iov,
unsigned int nvec, int enc);
void ksmbd_copy_gss_neg_header(void *buf);
int ksmbd_auth_ntlmv2(struct ksmbd_conn *conn, struct ksmbd_session *sess,
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
index 756ad631c019..12be8386446a 100644
--- a/fs/ksmbd/connection.c
+++ b/fs/ksmbd/connection.c
@@ -60,6 +60,12 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
conn->local_nls = load_nls("utf8");
if (!conn->local_nls)
conn->local_nls = load_nls_default();
+ if (IS_ENABLED(CONFIG_UNICODE))
+ conn->um = utf8_load(UNICODE_AGE(12, 1, 0));
+ else
+ conn->um = ERR_PTR(-EOPNOTSUPP);
+ if (IS_ERR(conn->um))
+ conn->um = NULL;
atomic_set(&conn->req_running, 0);
atomic_set(&conn->r_count, 0);
conn->total_credits = 1;
@@ -350,6 +356,8 @@ out:
wait_event(conn->r_count_q, atomic_read(&conn->r_count) == 0);
+ if (IS_ENABLED(CONFIG_UNICODE))
+ utf8_unload(conn->um);
unload_nls(conn->local_nls);
if (default_conn_ops.terminate_fn)
default_conn_ops.terminate_fn(conn);
diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
index e7f7d5707951..3643354a3fa7 100644
--- a/fs/ksmbd/connection.h
+++ b/fs/ksmbd/connection.h
@@ -14,6 +14,7 @@
#include <net/request_sock.h>
#include <linux/kthread.h>
#include <linux/nls.h>
+#include <linux/unicode.h>
#include "smb_common.h"
#include "ksmbd_work.h"
@@ -46,6 +47,7 @@ struct ksmbd_conn {
char *request_buf;
struct ksmbd_transport *transport;
struct nls_table *local_nls;
+ struct unicode_map *um;
struct list_head conns_list;
/* smb session 1 per user */
struct xarray sessions;
diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
index e0cbcfa98c7e..ff07c67f4565 100644
--- a/fs/ksmbd/ksmbd_netlink.h
+++ b/fs/ksmbd/ksmbd_netlink.h
@@ -163,7 +163,8 @@ struct ksmbd_share_config_response {
__u16 force_directory_mode;
__u16 force_uid;
__u16 force_gid;
- __u32 reserved[128]; /* Reserved room */
+ __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME];
+ __u32 reserved[112]; /* Reserved room */
__u32 veto_list_sz;
__s8 ____payload[];
};
diff --git a/fs/ksmbd/mgmt/share_config.c b/fs/ksmbd/mgmt/share_config.c
index c9bca1c2c834..328a412259dc 100644
--- a/fs/ksmbd/mgmt/share_config.c
+++ b/fs/ksmbd/mgmt/share_config.c
@@ -16,6 +16,7 @@
#include "user_config.h"
#include "user_session.h"
#include "../transport_ipc.h"
+#include "../misc.h"
#define SHARE_HASH_BITS 3
static DEFINE_HASHTABLE(shares_table, SHARE_HASH_BITS);
@@ -26,7 +27,7 @@ struct ksmbd_veto_pattern {
struct list_head list;
};
-static unsigned int share_name_hash(char *name)
+static unsigned int share_name_hash(const char *name)
{
return jhash(name, strlen(name), 0);
}
@@ -72,7 +73,7 @@ __get_share_config(struct ksmbd_share_config *share)
return share;
}
-static struct ksmbd_share_config *__share_lookup(char *name)
+static struct ksmbd_share_config *__share_lookup(const char *name)
{
struct ksmbd_share_config *share;
unsigned int key = share_name_hash(name);
@@ -119,7 +120,8 @@ static int parse_veto_list(struct ksmbd_share_config *share,
return 0;
}
-static struct ksmbd_share_config *share_config_request(char *name)
+static struct ksmbd_share_config *share_config_request(struct unicode_map *um,
+ const char *name)
{
struct ksmbd_share_config_response *resp;
struct ksmbd_share_config *share = NULL;
@@ -133,6 +135,19 @@ static struct ksmbd_share_config *share_config_request(char *name)
if (resp->flags == KSMBD_SHARE_FLAG_INVALID)
goto out;
+ if (*resp->share_name) {
+ char *cf_resp_name;
+ bool equal;
+
+ cf_resp_name = ksmbd_casefold_sharename(um, resp->share_name);
+ if (IS_ERR(cf_resp_name))
+ goto out;
+ equal = !strcmp(cf_resp_name, name);
+ kfree(cf_resp_name);
+ if (!equal)
+ goto out;
+ }
+
share = kzalloc(sizeof(struct ksmbd_share_config), GFP_KERNEL);
if (!share)
goto out;
@@ -190,20 +205,11 @@ out:
return share;
}
-static void strtolower(char *share_name)
-{
- while (*share_name) {
- *share_name = tolower(*share_name);
- share_name++;
- }
-}
-
-struct ksmbd_share_config *ksmbd_share_config_get(char *name)
+struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um,
+ const char *name)
{
struct ksmbd_share_config *share;
- strtolower(name);
-
down_read(&shares_table_lock);
share = __share_lookup(name);
if (share)
@@ -212,7 +218,7 @@ struct ksmbd_share_config *ksmbd_share_config_get(char *name)
if (share)
return share;
- return share_config_request(name);
+ return share_config_request(um, name);
}
bool ksmbd_share_veto_filename(struct ksmbd_share_config *share,
diff --git a/fs/ksmbd/mgmt/share_config.h b/fs/ksmbd/mgmt/share_config.h
index 902f2cb1963a..3fd338293942 100644
--- a/fs/ksmbd/mgmt/share_config.h
+++ b/fs/ksmbd/mgmt/share_config.h
@@ -9,6 +9,7 @@
#include <linux/workqueue.h>
#include <linux/hashtable.h>
#include <linux/path.h>
+#include <linux/unicode.h>
struct ksmbd_share_config {
char *name;
@@ -74,7 +75,8 @@ static inline void ksmbd_share_config_put(struct ksmbd_share_config *share)
__ksmbd_share_config_put(share);
}
-struct ksmbd_share_config *ksmbd_share_config_get(char *name);
+struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um,
+ const char *name);
bool ksmbd_share_veto_filename(struct ksmbd_share_config *share,
const char *filename);
#endif /* __SHARE_CONFIG_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/ksmbd/mgmt/tree_connect.c
index 97ab7987df6e..8ce17b3fb8da 100644
--- a/fs/ksmbd/mgmt/tree_connect.c
+++ b/fs/ksmbd/mgmt/tree_connect.c
@@ -17,7 +17,7 @@
struct ksmbd_tree_conn_status
ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
- char *share_name)
+ const char *share_name)
{
struct ksmbd_tree_conn_status status = {-ENOENT, NULL};
struct ksmbd_tree_connect_response *resp = NULL;
@@ -26,7 +26,7 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
struct sockaddr *peer_addr;
int ret;
- sc = ksmbd_share_config_get(share_name);
+ sc = ksmbd_share_config_get(conn->um, share_name);
if (!sc)
return status;
@@ -61,7 +61,7 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
struct ksmbd_share_config *new_sc;
ksmbd_share_config_del(sc);
- new_sc = ksmbd_share_config_get(share_name);
+ new_sc = ksmbd_share_config_get(conn->um, share_name);
if (!new_sc) {
pr_err("Failed to update stale share config\n");
status.ret = -ESTALE;
diff --git a/fs/ksmbd/mgmt/tree_connect.h b/fs/ksmbd/mgmt/tree_connect.h
index 71e50271dccf..0f97ddc1e39c 100644
--- a/fs/ksmbd/mgmt/tree_connect.h
+++ b/fs/ksmbd/mgmt/tree_connect.h
@@ -42,7 +42,7 @@ struct ksmbd_session;
struct ksmbd_tree_conn_status
ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
- char *share_name);
+ const char *share_name);
int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess,
struct ksmbd_tree_connect *tree_conn);
diff --git a/fs/ksmbd/misc.c b/fs/ksmbd/misc.c
index df991107ad2c..9e8afaa686e3 100644
--- a/fs/ksmbd/misc.c
+++ b/fs/ksmbd/misc.c
@@ -7,6 +7,7 @@
#include <linux/kernel.h>
#include <linux/xattr.h>
#include <linux/fs.h>
+#include <linux/unicode.h>
#include "misc.h"
#include "smb_common.h"
@@ -159,7 +160,7 @@ out:
*/
char *convert_to_nt_pathname(struct ksmbd_share_config *share,
- struct path *path)
+ const struct path *path)
{
char *pathname, *ab_pathname, *nt_pathname;
int share_path_len = share->path_sz;
@@ -226,26 +227,53 @@ void ksmbd_conv_path_to_windows(char *path)
strreplace(path, '/', '\\');
}
+char *ksmbd_casefold_sharename(struct unicode_map *um, const char *name)
+{
+ char *cf_name;
+ int cf_len;
+
+ cf_name = kzalloc(KSMBD_REQ_MAX_SHARE_NAME, GFP_KERNEL);
+ if (!cf_name)
+ return ERR_PTR(-ENOMEM);
+
+ if (IS_ENABLED(CONFIG_UNICODE) && um) {
+ const struct qstr q_name = {.name = name, .len = strlen(name)};
+
+ cf_len = utf8_casefold(um, &q_name, cf_name,
+ KSMBD_REQ_MAX_SHARE_NAME);
+ if (cf_len < 0)
+ goto out_ascii;
+
+ return cf_name;
+ }
+
+out_ascii:
+ cf_len = strscpy(cf_name, name, KSMBD_REQ_MAX_SHARE_NAME);
+ if (cf_len < 0) {
+ kfree(cf_name);
+ return ERR_PTR(-E2BIG);
+ }
+
+ for (; *cf_name; ++cf_name)
+ *cf_name = isascii(*cf_name) ? tolower(*cf_name) : *cf_name;
+ return cf_name - cf_len;
+}
+
/**
* ksmbd_extract_sharename() - get share name from tree connect request
* @treename: buffer containing tree name and share name
*
* Return: share name on success, otherwise error
*/
-char *ksmbd_extract_sharename(char *treename)
+char *ksmbd_extract_sharename(struct unicode_map *um, const char *treename)
{
- char *name = treename;
- char *dst;
- char *pos = strrchr(name, '\\');
+ const char *name = treename, *pos = strrchr(name, '\\');
if (pos)
name = (pos + 1);
/* caller has to free the memory */
- dst = kstrdup(name, GFP_KERNEL);
- if (!dst)
- return ERR_PTR(-ENOMEM);
- return dst;
+ return ksmbd_casefold_sharename(um, name);
}
/**
diff --git a/fs/ksmbd/misc.h b/fs/ksmbd/misc.h
index aae2a252945f..1facfcd21200 100644
--- a/fs/ksmbd/misc.h
+++ b/fs/ksmbd/misc.h
@@ -15,12 +15,13 @@ int match_pattern(const char *str, size_t len, const char *pattern);
int ksmbd_validate_filename(char *filename);
int parse_stream_name(char *filename, char **stream_name, int *s_type);
char *convert_to_nt_pathname(struct ksmbd_share_config *share,
- struct path *path);
+ const struct path *path);
int get_nlink(struct kstat *st);
void ksmbd_conv_path_to_unix(char *path);
void ksmbd_strip_last_slash(char *path);
void ksmbd_conv_path_to_windows(char *path);
-char *ksmbd_extract_sharename(char *treename);
+char *ksmbd_casefold_sharename(struct unicode_map *um, const char *name);
+char *ksmbd_extract_sharename(struct unicode_map *um, const char *treename);
char *convert_to_unix_name(struct ksmbd_share_config *share, const char *name);
#define KSMBD_DIR_INFO_ALIGNMENT 8
diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c
index 5052be9261d9..0ae8d08d85a8 100644
--- a/fs/ksmbd/ndr.c
+++ b/fs/ksmbd/ndr.c
@@ -345,6 +345,8 @@ int ndr_encode_posix_acl(struct ndr *n,
{
unsigned int ref_id = 0x00020000;
int ret;
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
n->offset = 0;
n->length = 1024;
@@ -372,10 +374,12 @@ int ndr_encode_posix_acl(struct ndr *n,
if (ret)
return ret;
- ret = ndr_write_int64(n, from_kuid(&init_user_ns, i_uid_into_mnt(user_ns, inode)));
+ vfsuid = i_uid_into_vfsuid(user_ns, inode);
+ ret = ndr_write_int64(n, from_kuid(&init_user_ns, vfsuid_into_kuid(vfsuid)));
if (ret)
return ret;
- ret = ndr_write_int64(n, from_kgid(&init_user_ns, i_gid_into_mnt(user_ns, inode)));
+ vfsgid = i_gid_into_vfsgid(user_ns, inode);
+ ret = ndr_write_int64(n, from_kgid(&init_user_ns, vfsgid_into_kgid(vfsgid)));
if (ret)
return ret;
ret = ndr_write_int32(n, inode->i_mode);
diff --git a/fs/ksmbd/oplock.c b/fs/ksmbd/oplock.c
index 9046cff4374b..d7d47b82451d 100644
--- a/fs/ksmbd/oplock.c
+++ b/fs/ksmbd/oplock.c
@@ -1609,12 +1609,18 @@ void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp)
struct create_posix_rsp *buf;
struct inode *inode = file_inode(fp->filp);
struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(user_ns, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(user_ns, inode);
buf = (struct create_posix_rsp *)cc;
memset(buf, 0, sizeof(struct create_posix_rsp));
buf->ccontext.DataOffset = cpu_to_le16(offsetof
(struct create_posix_rsp, nlink));
- buf->ccontext.DataLength = cpu_to_le32(52);
+ /*
+ * DataLength = nlink(4) + reparse_tag(4) + mode(4) +
+ * domain sid(28) + unix group sid(16).
+ */
+ buf->ccontext.DataLength = cpu_to_le32(56);
buf->ccontext.NameOffset = cpu_to_le16(offsetof
(struct create_posix_rsp, Name));
buf->ccontext.NameLength = cpu_to_le16(POSIX_CTXT_DATA_LEN);
@@ -1638,13 +1644,18 @@ void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp)
buf->nlink = cpu_to_le32(inode->i_nlink);
buf->reparse_tag = cpu_to_le32(fp->volatile_id);
- buf->mode = cpu_to_le32(inode->i_mode);
- id_to_sid(from_kuid_munged(&init_user_ns,
- i_uid_into_mnt(user_ns, inode)),
- SIDNFS_USER, (struct smb_sid *)&buf->SidBuffer[0]);
- id_to_sid(from_kgid_munged(&init_user_ns,
- i_gid_into_mnt(user_ns, inode)),
- SIDNFS_GROUP, (struct smb_sid *)&buf->SidBuffer[20]);
+ buf->mode = cpu_to_le32(inode->i_mode & 0777);
+ /*
+ * SidBuffer(44) contain two sids(Domain sid(28), UNIX group sid(16)).
+ * Domain sid(28) = revision(1) + num_subauth(1) + authority(6) +
+ * sub_auth(4 * 4(num_subauth)) + RID(4).
+ * UNIX group id(16) = revision(1) + num_subauth(1) + authority(6) +
+ * sub_auth(4 * 1(num_subauth)) + RID(4).
+ */
+ id_to_sid(from_kuid_munged(&init_user_ns, vfsuid_into_kuid(vfsuid)),
+ SIDOWNER, (struct smb_sid *)&buf->SidBuffer[0]);
+ id_to_sid(from_kgid_munged(&init_user_ns, vfsgid_into_kgid(vfsgid)),
+ SIDUNIX_GROUP, (struct smb_sid *)&buf->SidBuffer[28]);
}
/*
diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c
index ce42bff42ef9..a0d635304754 100644
--- a/fs/ksmbd/server.c
+++ b/fs/ksmbd/server.c
@@ -235,10 +235,8 @@ send:
if (work->sess && work->sess->enc && work->encrypted &&
conn->ops->encrypt_resp) {
rc = conn->ops->encrypt_resp(work);
- if (rc < 0) {
+ if (rc < 0)
conn->ops->set_rsp_status(work, STATUS_DATA_ERROR);
- goto send;
- }
}
ksmbd_conn_write(work);
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 19412ac701a6..b2fc85d440d0 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -925,7 +925,7 @@ static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
*
* Return: true if connection should be encrypted, else false
*/
-static bool smb3_encryption_negotiated(struct ksmbd_conn *conn)
+bool smb3_encryption_negotiated(struct ksmbd_conn *conn)
{
if (!conn->ops->generate_encryptionkey)
return false;
@@ -1883,7 +1883,7 @@ int smb2_tree_connect(struct ksmbd_work *work)
goto out_err1;
}
- name = ksmbd_extract_sharename(treename);
+ name = ksmbd_extract_sharename(conn->um, treename);
if (IS_ERR(name)) {
status.ret = KSMBD_TREE_CONN_STATUS_ERROR;
goto out_err1;
@@ -2185,7 +2185,7 @@ out:
* Return: 0 on success, otherwise error
*/
static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
- struct path *path)
+ const struct path *path)
{
struct user_namespace *user_ns = mnt_user_ns(path->mnt);
char *attr_name = NULL, *value;
@@ -2272,7 +2272,7 @@ next:
return rc;
}
-static noinline int smb2_set_stream_name_xattr(struct path *path,
+static noinline int smb2_set_stream_name_xattr(const struct path *path,
struct ksmbd_file *fp,
char *stream_name, int s_type)
{
@@ -2311,7 +2311,7 @@ static noinline int smb2_set_stream_name_xattr(struct path *path,
return 0;
}
-static int smb2_remove_smb_xattrs(struct path *path)
+static int smb2_remove_smb_xattrs(const struct path *path)
{
struct user_namespace *user_ns = mnt_user_ns(path->mnt);
char *name, *xattr_list = NULL;
@@ -2345,7 +2345,7 @@ out:
return err;
}
-static int smb2_create_truncate(struct path *path)
+static int smb2_create_truncate(const struct path *path)
{
int rc = vfs_truncate(path, 0);
@@ -2364,7 +2364,7 @@ static int smb2_create_truncate(struct path *path)
return rc;
}
-static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, struct path *path,
+static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path *path,
struct ksmbd_file *fp)
{
struct xattr_dos_attrib da = {0};
@@ -2387,7 +2387,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, struct path *path,
}
static void smb2_update_xattrs(struct ksmbd_tree_connect *tcon,
- struct path *path, struct ksmbd_file *fp)
+ const struct path *path, struct ksmbd_file *fp)
{
struct xattr_dos_attrib da;
int rc;
@@ -2447,7 +2447,7 @@ static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name,
static int smb2_create_sd_buffer(struct ksmbd_work *work,
struct smb2_create_req *req,
- struct path *path)
+ const struct path *path)
{
struct create_context *context;
struct create_sd_buf_req *sd_buf;
@@ -2477,8 +2477,11 @@ static void ksmbd_acls_fattr(struct smb_fattr *fattr,
struct user_namespace *mnt_userns,
struct inode *inode)
{
- fattr->cf_uid = i_uid_into_mnt(mnt_userns, inode);
- fattr->cf_gid = i_gid_into_mnt(mnt_userns, inode);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+
+ fattr->cf_uid = vfsuid_into_kuid(vfsuid);
+ fattr->cf_gid = vfsgid_into_kgid(vfsgid);
fattr->cf_mode = inode->i_mode;
fattr->cf_acls = NULL;
fattr->cf_dacls = NULL;
@@ -2761,7 +2764,6 @@ int smb2_open(struct ksmbd_work *work)
} else {
file_present = true;
user_ns = mnt_user_ns(path.mnt);
- generic_fillattr(user_ns, d_inode(path.dentry), &stat);
}
if (stream_name) {
if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) {
@@ -2770,7 +2772,8 @@ int smb2_open(struct ksmbd_work *work)
rsp->hdr.Status = STATUS_NOT_A_DIRECTORY;
}
} else {
- if (S_ISDIR(stat.mode) && s_type == DATA_STREAM) {
+ if (file_present && S_ISDIR(d_inode(path.dentry)->i_mode) &&
+ s_type == DATA_STREAM) {
rc = -EIO;
rsp->hdr.Status = STATUS_FILE_IS_A_DIRECTORY;
}
@@ -2787,7 +2790,8 @@ int smb2_open(struct ksmbd_work *work)
}
if (file_present && req->CreateOptions & FILE_NON_DIRECTORY_FILE_LE &&
- S_ISDIR(stat.mode) && !(req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) {
+ S_ISDIR(d_inode(path.dentry)->i_mode) &&
+ !(req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) {
ksmbd_debug(SMB, "open() argument is a directory: %s, %x\n",
name, req->CreateOptions);
rsp->hdr.Status = STATUS_FILE_IS_A_DIRECTORY;
@@ -2797,7 +2801,7 @@ int smb2_open(struct ksmbd_work *work)
if (file_present && (req->CreateOptions & FILE_DIRECTORY_FILE_LE) &&
!(req->CreateDisposition == FILE_CREATE_LE) &&
- !S_ISDIR(stat.mode)) {
+ !S_ISDIR(d_inode(path.dentry)->i_mode)) {
rsp->hdr.Status = STATUS_NOT_A_DIRECTORY;
rc = -EIO;
goto err_out;
@@ -3561,17 +3565,22 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
posix_info->AllocationSize = cpu_to_le64(ksmbd_kstat->kstat->blocks << 9);
posix_info->DeviceId = cpu_to_le32(ksmbd_kstat->kstat->rdev);
posix_info->HardLinks = cpu_to_le32(ksmbd_kstat->kstat->nlink);
- posix_info->Mode = cpu_to_le32(ksmbd_kstat->kstat->mode);
+ posix_info->Mode = cpu_to_le32(ksmbd_kstat->kstat->mode & 0777);
posix_info->Inode = cpu_to_le64(ksmbd_kstat->kstat->ino);
posix_info->DosAttributes =
S_ISDIR(ksmbd_kstat->kstat->mode) ?
FILE_ATTRIBUTE_DIRECTORY_LE : FILE_ATTRIBUTE_ARCHIVE_LE;
if (d_info->hide_dot_file && d_info->name[0] == '.')
posix_info->DosAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
+ /*
+ * SidBuffer(32) contain two sids(Domain sid(16), UNIX group sid(16)).
+ * UNIX sid(16) = revision(1) + num_subauth(1) + authority(6) +
+ * sub_auth(4 * 1(num_subauth)) + RID(4).
+ */
id_to_sid(from_kuid_munged(&init_user_ns, ksmbd_kstat->kstat->uid),
- SIDNFS_USER, (struct smb_sid *)&posix_info->SidBuffer[0]);
+ SIDUNIX_USER, (struct smb_sid *)&posix_info->SidBuffer[0]);
id_to_sid(from_kgid_munged(&init_user_ns, ksmbd_kstat->kstat->gid),
- SIDNFS_GROUP, (struct smb_sid *)&posix_info->SidBuffer[20]);
+ SIDUNIX_GROUP, (struct smb_sid *)&posix_info->SidBuffer[16]);
memcpy(posix_info->name, conv_name, conv_len);
posix_info->name_len = cpu_to_le32(conv_len);
posix_info->NextEntryOffset = cpu_to_le32(next_entry_offset);
@@ -3776,7 +3785,7 @@ static int reserve_populate_dentry(struct ksmbd_dir_info *d_info,
return 0;
}
-static int __query_dir(struct dir_context *ctx, const char *name, int namlen,
+static bool __query_dir(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct ksmbd_readdir_data *buf;
@@ -3790,27 +3799,20 @@ static int __query_dir(struct dir_context *ctx, const char *name, int namlen,
/* dot and dotdot entries are already reserved */
if (!strcmp(".", name) || !strcmp("..", name))
- return 0;
+ return true;
if (ksmbd_share_veto_filename(priv->work->tcon->share_conf, name))
- return 0;
+ return true;
if (!match_pattern(name, namlen, priv->search_pattern))
- return 0;
+ return true;
d_info->name = name;
d_info->name_len = namlen;
rc = reserve_populate_dentry(d_info, priv->info_level);
if (rc)
- return rc;
- if (d_info->flags & SMB2_RETURN_SINGLE_ENTRY) {
+ return false;
+ if (d_info->flags & SMB2_RETURN_SINGLE_ENTRY)
d_info->out_buf_len = 0;
- return 0;
- }
- return 0;
-}
-
-static void restart_ctx(struct dir_context *ctx)
-{
- ctx->pos = 0;
+ return true;
}
static int verify_info_level(int info_level)
@@ -3894,8 +3896,7 @@ int smb2_query_dir(struct ksmbd_work *work)
inode_permission(file_mnt_user_ns(dir_fp->filp),
file_inode(dir_fp->filp),
MAY_READ | MAY_EXEC)) {
- pr_err("no right to enumerate directory (%pd)\n",
- dir_fp->filp->f_path.dentry);
+ pr_err("no right to enumerate directory (%pD)\n", dir_fp->filp);
rc = -EACCES;
goto err_out2;
}
@@ -3921,7 +3922,6 @@ int smb2_query_dir(struct ksmbd_work *work)
if (srch_flag & SMB2_REOPEN || srch_flag & SMB2_RESTART_SCANS) {
ksmbd_debug(SMB, "Restart directory scan\n");
generic_file_llseek(dir_fp->filp, 0, SEEK_SET);
- restart_ctx(&dir_fp->readdir_data.ctx);
}
memset(&d_info, 0, sizeof(struct ksmbd_dir_info));
@@ -3968,11 +3968,9 @@ int smb2_query_dir(struct ksmbd_work *work)
*/
if (!d_info.out_buf_len && !d_info.num_entry)
goto no_buf_len;
- if (rc == 0)
- restart_ctx(&dir_fp->readdir_data.ctx);
- if (rc == -ENOSPC)
+ if (rc > 0 || rc == -ENOSPC)
rc = 0;
- if (rc)
+ else if (rc)
goto err_out;
d_info.wptr = d_info.rptr;
@@ -4029,6 +4027,8 @@ err_out2:
rsp->hdr.Status = STATUS_NO_MEMORY;
else if (rc == -EFAULT)
rsp->hdr.Status = STATUS_INVALID_INFO_CLASS;
+ else if (rc == -EIO)
+ rsp->hdr.Status = STATUS_FILE_CORRUPT_ERROR;
if (!rsp->hdr.Status)
rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
@@ -4158,7 +4158,7 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
int rc, name_len, value_len, xattr_list_len, idx;
ssize_t buf_free_len, alignment_bytes, next_offset, rsp_data_cnt = 0;
struct smb2_ea_info_req *ea_req = NULL;
- struct path *path;
+ const struct path *path;
struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
if (!(fp->daccess & FILE_READ_EA_LE)) {
@@ -4495,7 +4495,7 @@ static void get_file_stream_info(struct ksmbd_work *work,
struct smb2_file_stream_info *file_info;
char *stream_name, *xattr_list = NULL, *stream_buf;
struct kstat stat;
- struct path *path = &fp->filp->f_path;
+ const struct path *path = &fp->filp->f_path;
ssize_t xattr_list_len;
int nbytes = 0, streamlen, stream_name_len, next, idx = 0;
int buf_free_len;
@@ -4720,7 +4720,11 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
{
struct smb311_posix_qinfo *file_info;
struct inode *inode = file_inode(fp->filp);
+ struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(user_ns, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(user_ns, inode);
u64 time;
+ int out_buf_len = sizeof(struct smb311_posix_qinfo) + 32;
file_info = (struct smb311_posix_qinfo *)rsp->Buffer;
file_info->CreationTime = cpu_to_le64(fp->create_time);
@@ -4735,12 +4739,22 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
file_info->EndOfFile = cpu_to_le64(inode->i_size);
file_info->AllocationSize = cpu_to_le64(inode->i_blocks << 9);
file_info->HardLinks = cpu_to_le32(inode->i_nlink);
- file_info->Mode = cpu_to_le32(inode->i_mode);
+ file_info->Mode = cpu_to_le32(inode->i_mode & 0777);
file_info->DeviceId = cpu_to_le32(inode->i_rdev);
- rsp->OutputBufferLength =
- cpu_to_le32(sizeof(struct smb311_posix_qinfo));
- inc_rfc1001_len(rsp_org, sizeof(struct smb311_posix_qinfo));
- return 0;
+
+ /*
+ * Sids(32) contain two sids(Domain sid(16), UNIX group sid(16)).
+ * UNIX sid(16) = revision(1) + num_subauth(1) + authority(6) +
+ * sub_auth(4 * 1(num_subauth)) + RID(4).
+ */
+ id_to_sid(from_kuid_munged(&init_user_ns, vfsuid_into_kuid(vfsuid)),
+ SIDUNIX_USER, (struct smb_sid *)&file_info->Sids[0]);
+ id_to_sid(from_kgid_munged(&init_user_ns, vfsgid_into_kgid(vfsgid)),
+ SIDUNIX_GROUP, (struct smb_sid *)&file_info->Sids[16]);
+
+ rsp->OutputBufferLength = cpu_to_le32(out_buf_len);
+ inc_rfc1001_len(rsp_org, out_buf_len);
+ return out_buf_len;
}
static int smb2_get_info_file(struct ksmbd_work *work,
@@ -4860,8 +4874,8 @@ static int smb2_get_info_file(struct ksmbd_work *work,
pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
rc = -EOPNOTSUPP;
} else {
- rc = find_file_posix_info(rsp, fp, work->response_buf);
- file_infoclass_size = sizeof(struct smb311_posix_qinfo);
+ file_infoclass_size = find_file_posix_info(rsp, fp,
+ work->response_buf);
}
break;
default:
@@ -5413,7 +5427,7 @@ static int smb2_rename(struct ksmbd_work *work,
if (!pathname)
return -ENOMEM;
- abs_oldname = d_path(&fp->filp->f_path, pathname, PATH_MAX);
+ abs_oldname = file_path(fp->filp, pathname, PATH_MAX);
if (IS_ERR(abs_oldname)) {
rc = -EINVAL;
goto out;
@@ -5548,7 +5562,7 @@ static int smb2_create_link(struct ksmbd_work *work,
}
ksmbd_debug(SMB, "link name is %s\n", link_name);
- target_name = d_path(&filp->f_path, pathname, PATH_MAX);
+ target_name = file_path(filp, pathname, PATH_MAX);
if (IS_ERR(target_name)) {
rc = -EINVAL;
goto out;
@@ -6266,8 +6280,8 @@ int smb2_read(struct ksmbd_work *work)
goto out;
}
- ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n",
- fp->filp->f_path.dentry, offset, length);
+ ksmbd_debug(SMB, "filename %pD, offset %lld, len %zu\n",
+ fp->filp, offset, length);
work->aux_payload_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
if (!work->aux_payload_buf) {
@@ -6531,8 +6545,8 @@ int smb2_write(struct ksmbd_work *work)
data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
le16_to_cpu(req->DataOffset));
- ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n",
- fp->filp->f_path.dentry, offset, length);
+ ksmbd_debug(SMB, "filename %pD, offset %lld, len %zu\n",
+ fp->filp, offset, length);
err = ksmbd_vfs_write(work, fp, data_buf, length, &offset,
writethrough, &nbytes);
if (err < 0)
@@ -7643,11 +7657,16 @@ int smb2_ioctl(struct ksmbd_work *work)
goto out;
}
- if (in_buf_len < sizeof(struct validate_negotiate_info_req))
- return -EINVAL;
+ if (in_buf_len < offsetof(struct validate_negotiate_info_req,
+ Dialects)) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (out_buf_len < sizeof(struct validate_negotiate_info_rsp))
- return -EINVAL;
+ if (out_buf_len < sizeof(struct validate_negotiate_info_rsp)) {
+ ret = -EINVAL;
+ goto out;
+ }
ret = fsctl_validate_negotiate_info(conn,
(struct validate_negotiate_info_req *)&req->Buffer[0],
@@ -8573,7 +8592,7 @@ int smb3_encrypt_resp(struct ksmbd_work *work)
buf_size += iov[1].iov_len;
work->resp_hdr_sz = iov[1].iov_len;
- rc = ksmbd_crypt_message(work->conn, iov, rq_nvec, 1);
+ rc = ksmbd_crypt_message(work, iov, rq_nvec, 1);
if (rc)
return rc;
@@ -8592,7 +8611,6 @@ bool smb3_is_transform_hdr(void *buf)
int smb3_decrypt_req(struct ksmbd_work *work)
{
- struct ksmbd_conn *conn = work->conn;
struct ksmbd_session *sess;
char *buf = work->request_buf;
unsigned int pdu_length = get_rfc1002_len(buf);
@@ -8612,7 +8630,7 @@ int smb3_decrypt_req(struct ksmbd_work *work)
return -ECONNABORTED;
}
- sess = ksmbd_session_lookup_all(conn, le64_to_cpu(tr_hdr->SessionId));
+ sess = ksmbd_session_lookup_all(work->conn, le64_to_cpu(tr_hdr->SessionId));
if (!sess) {
pr_err("invalid session id(%llx) in transform header\n",
le64_to_cpu(tr_hdr->SessionId));
@@ -8623,7 +8641,7 @@ int smb3_decrypt_req(struct ksmbd_work *work)
iov[0].iov_len = sizeof(struct smb2_transform_hdr) + 4;
iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr) + 4;
iov[1].iov_len = buf_data_size;
- rc = ksmbd_crypt_message(conn, iov, 2, 0);
+ rc = ksmbd_crypt_message(work, iov, 2, 0);
if (rc)
return rc;
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
index af455278d005..092fdd3f8750 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/ksmbd/smb2pdu.h
@@ -158,7 +158,8 @@ struct create_posix_rsp {
__le32 nlink;
__le32 reparse_tag;
__le32 mode;
- u8 SidBuffer[40];
+ /* SidBuffer contain two sids(Domain sid(28), UNIX group sid(16)) */
+ u8 SidBuffer[44];
} __packed;
struct smb2_buffer_desc_v1 {
@@ -439,7 +440,8 @@ struct smb2_posix_info {
__le32 HardLinks;
__le32 ReparseTag;
__le32 Mode;
- u8 SidBuffer[40];
+ /* SidBuffer contain two sids (UNIX user sid(16), UNIX group sid(16)) */
+ u8 SidBuffer[32];
__le32 name_len;
u8 name[1];
/*
@@ -492,6 +494,7 @@ int smb3_decrypt_req(struct ksmbd_work *work);
int smb3_encrypt_resp(struct ksmbd_work *work);
bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work);
int smb2_set_rsp_credits(struct ksmbd_work *work);
+bool smb3_encryption_negotiated(struct ksmbd_conn *conn);
/* smb2 misc functions */
int ksmbd_smb2_check_message(struct ksmbd_work *work);
diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c
index 7f8ab14fb8ec..d96da872d70a 100644
--- a/fs/ksmbd/smb_common.c
+++ b/fs/ksmbd/smb_common.c
@@ -4,6 +4,8 @@
* Copyright (C) 2018 Namjae Jeon <linkinjeon@kernel.org>
*/
+#include <linux/user_namespace.h>
+
#include "smb_common.h"
#include "server.h"
#include "misc.h"
@@ -625,8 +627,8 @@ int ksmbd_override_fsids(struct ksmbd_work *work)
if (!cred)
return -ENOMEM;
- cred->fsuid = make_kuid(current_user_ns(), uid);
- cred->fsgid = make_kgid(current_user_ns(), gid);
+ cred->fsuid = make_kuid(&init_user_ns, uid);
+ cred->fsgid = make_kgid(&init_user_ns, gid);
gi = groups_alloc(0);
if (!gi) {
diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c
index 3781bca2c8fc..b05ff9b146b5 100644
--- a/fs/ksmbd/smbacl.c
+++ b/fs/ksmbd/smbacl.c
@@ -275,7 +275,8 @@ static int sid_to_id(struct user_namespace *user_ns,
uid_t id;
id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
- uid = mapped_kuid_user(user_ns, &init_user_ns, KUIDT_INIT(id));
+ uid = KUIDT_INIT(id);
+ uid = from_vfsuid(user_ns, &init_user_ns, VFSUIDT_INIT(uid));
if (uid_valid(uid)) {
fattr->cf_uid = uid;
rc = 0;
@@ -285,7 +286,8 @@ static int sid_to_id(struct user_namespace *user_ns,
gid_t id;
id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
- gid = mapped_kgid_user(user_ns, &init_user_ns, KGIDT_INIT(id));
+ gid = KGIDT_INIT(id);
+ gid = from_vfsgid(user_ns, &init_user_ns, VFSGIDT_INIT(gid));
if (gid_valid(gid)) {
fattr->cf_gid = gid;
rc = 0;
@@ -991,7 +993,7 @@ static void smb_set_ace(struct smb_ace *ace, const struct smb_sid *sid, u8 type,
}
int smb_inherit_dacl(struct ksmbd_conn *conn,
- struct path *path,
+ const struct path *path,
unsigned int uid, unsigned int gid)
{
const struct smb_sid *psid, *creator = NULL;
@@ -1185,7 +1187,7 @@ bool smb_inherit_flags(int flags, bool is_dir)
return false;
}
-int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
__le32 *pdaccess, int uid)
{
struct user_namespace *user_ns = mnt_user_ns(path->mnt);
@@ -1352,7 +1354,7 @@ err_out:
}
int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
- struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
+ const struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
bool type_check)
{
int rc;
diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h
index fcb2c83f2992..618f2e0236b3 100644
--- a/fs/ksmbd/smbacl.h
+++ b/fs/ksmbd/smbacl.h
@@ -201,12 +201,12 @@ void posix_state_to_acl(struct posix_acl_state *state,
struct posix_acl_entry *pace);
int compare_sids(const struct smb_sid *ctsid, const struct smb_sid *cwsid);
bool smb_inherit_flags(int flags, bool is_dir);
-int smb_inherit_dacl(struct ksmbd_conn *conn, struct path *path,
+int smb_inherit_dacl(struct ksmbd_conn *conn, const struct path *path,
unsigned int uid, unsigned int gid);
-int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
__le32 *pdaccess, int uid);
int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
- struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
+ const struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
bool type_check);
void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid);
void ksmbd_init_domain(u32 *sub_auth);
@@ -214,25 +214,25 @@ void ksmbd_init_domain(u32 *sub_auth);
static inline uid_t posix_acl_uid_translate(struct user_namespace *mnt_userns,
struct posix_acl_entry *pace)
{
- kuid_t kuid;
+ vfsuid_t vfsuid;
/* If this is an idmapped mount, apply the idmapping. */
- kuid = mapped_kuid_fs(mnt_userns, &init_user_ns, pace->e_uid);
+ vfsuid = make_vfsuid(mnt_userns, &init_user_ns, pace->e_uid);
/* Translate the kuid into a userspace id ksmbd would see. */
- return from_kuid(&init_user_ns, kuid);
+ return from_kuid(&init_user_ns, vfsuid_into_kuid(vfsuid));
}
static inline gid_t posix_acl_gid_translate(struct user_namespace *mnt_userns,
struct posix_acl_entry *pace)
{
- kgid_t kgid;
+ vfsgid_t vfsgid;
/* If this is an idmapped mount, apply the idmapping. */
- kgid = mapped_kgid_fs(mnt_userns, &init_user_ns, pace->e_gid);
+ vfsgid = make_vfsgid(mnt_userns, &init_user_ns, pace->e_gid);
/* Translate the kgid into a userspace id ksmbd would see. */
- return from_kgid(&init_user_ns, kgid);
+ return from_kgid(&init_user_ns, vfsgid_into_kgid(vfsgid));
}
#endif /* _SMBACL_H */
diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c
index 7cb0eeb07c80..c9aca21637d5 100644
--- a/fs/ksmbd/transport_ipc.c
+++ b/fs/ksmbd/transport_ipc.c
@@ -197,6 +197,7 @@ static struct genl_family ksmbd_genl_family = {
.module = THIS_MODULE,
.ops = ksmbd_genl_ops,
.n_ops = ARRAY_SIZE(ksmbd_genl_ops),
+ .resv_start_op = KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE + 1,
};
static void ksmbd_nl_init_fixup(void)
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
index 35b55ee94fe5..096eda9ef873 100644
--- a/fs/ksmbd/transport_rdma.c
+++ b/fs/ksmbd/transport_rdma.c
@@ -32,7 +32,7 @@
/* SMB_DIRECT negotiation timeout in seconds */
#define SMB_DIRECT_NEGOTIATE_TIMEOUT 120
-#define SMB_DIRECT_MAX_SEND_SGES 8
+#define SMB_DIRECT_MAX_SEND_SGES 6
#define SMB_DIRECT_MAX_RECV_SGES 1
/*
@@ -62,13 +62,13 @@ static int smb_direct_receive_credit_max = 255;
static int smb_direct_send_credit_target = 255;
/* The maximum single message size can be sent to remote peer */
-static int smb_direct_max_send_size = 8192;
+static int smb_direct_max_send_size = 1364;
/* The maximum fragmented upper-layer payload receive size supported */
static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
/* The maximum single-message size which can be received */
-static int smb_direct_max_receive_size = 8192;
+static int smb_direct_max_receive_size = 1364;
static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE;
@@ -1527,6 +1527,8 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
}
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_DISCONNECTED: {
+ ib_drain_qp(t->qp);
+
t->status = SMB_DIRECT_CS_DISCONNECTED;
wake_up_interruptible(&t->wait_status);
wake_up_interruptible(&t->wait_reassembly_queue);
diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c
index 143bba4e4db8..63d55f543bd2 100644
--- a/fs/ksmbd/transport_tcp.c
+++ b/fs/ksmbd/transport_tcp.c
@@ -399,7 +399,8 @@ static int create_socket(struct interface *iface)
ret = sock_create(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &ksmbd_socket);
if (ret) {
- pr_err("Can't create socket for ipv6, try ipv4: %d\n", ret);
+ if (ret != -EAFNOSUPPORT)
+ pr_err("Can't create socket for ipv6, fallback to ipv4: %d\n", ret);
ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP,
&ksmbd_socket);
if (ret) {
diff --git a/fs/ksmbd/unicode.h b/fs/ksmbd/unicode.h
index 5593024230ae..076f6034a789 100644
--- a/fs/ksmbd/unicode.h
+++ b/fs/ksmbd/unicode.h
@@ -24,6 +24,7 @@
#include <asm/byteorder.h>
#include <linux/types.h>
#include <linux/nls.h>
+#include <linux/unicode.h>
#define UNIUPR_NOLOWER /* Example to not expand lower case tables */
@@ -69,7 +70,7 @@ char *smb_strndup_from_utf16(const char *src, const int maxlen,
const struct nls_table *codepage);
int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
const struct nls_table *cp, int mapchars);
-char *ksmbd_extract_sharename(char *treename);
+char *ksmbd_extract_sharename(struct unicode_map *um, const char *treename);
#endif
/*
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 78d01033604c..8de970d6146f 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -377,8 +377,7 @@ int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count,
if (work->conn->connection_type) {
if (!(fp->daccess & (FILE_READ_DATA_LE | FILE_EXECUTE_LE))) {
- pr_err("no right to read(%pd)\n",
- fp->filp->f_path.dentry);
+ pr_err("no right to read(%pD)\n", fp->filp);
return -EACCES;
}
}
@@ -487,8 +486,7 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
if (work->conn->connection_type) {
if (!(fp->daccess & FILE_WRITE_DATA_LE)) {
- pr_err("no right to write(%pd)\n",
- fp->filp->f_path.dentry);
+ pr_err("no right to write(%pD)\n", fp->filp);
err = -EACCES;
goto out;
}
@@ -527,8 +525,8 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
if (sync) {
err = vfs_fsync_range(filp, offset, offset + *written, 0);
if (err < 0)
- pr_err("fsync failed for filename = %pd, err = %d\n",
- fp->filp->f_path.dentry, err);
+ pr_err("fsync failed for filename = %pD, err = %d\n",
+ fp->filp, err);
}
out:
@@ -543,7 +541,7 @@ out:
*
* Return: 0 on success, otherwise error
*/
-int ksmbd_vfs_getattr(struct path *path, struct kstat *stat)
+int ksmbd_vfs_getattr(const struct path *path, struct kstat *stat)
{
int err;
@@ -1105,7 +1103,7 @@ int ksmbd_vfs_unlink(struct user_namespace *user_ns,
return err;
}
-static int __dir_empty(struct dir_context *ctx, const char *name, int namlen,
+static bool __dir_empty(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct ksmbd_readdir_data *buf;
@@ -1113,9 +1111,7 @@ static int __dir_empty(struct dir_context *ctx, const char *name, int namlen,
buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
buf->dirent_count++;
- if (buf->dirent_count > 2)
- return -ENOTEMPTY;
- return 0;
+ return buf->dirent_count <= 2;
}
/**
@@ -1142,22 +1138,33 @@ int ksmbd_vfs_empty_dir(struct ksmbd_file *fp)
return err;
}
-static int __caseless_lookup(struct dir_context *ctx, const char *name,
+static bool __caseless_lookup(struct dir_context *ctx, const char *name,
int namlen, loff_t offset, u64 ino,
unsigned int d_type)
{
struct ksmbd_readdir_data *buf;
+ int cmp = -EINVAL;
buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
if (buf->used != namlen)
- return 0;
- if (!strncasecmp((char *)buf->private, name, namlen)) {
+ return true;
+ if (IS_ENABLED(CONFIG_UNICODE) && buf->um) {
+ const struct qstr q_buf = {.name = buf->private,
+ .len = buf->used};
+ const struct qstr q_name = {.name = name,
+ .len = namlen};
+
+ cmp = utf8_strncasecmp(buf->um, &q_buf, &q_name);
+ }
+ if (cmp < 0)
+ cmp = strncasecmp((char *)buf->private, name, namlen);
+ if (!cmp) {
memcpy((char *)buf->private, name, namlen);
buf->dirent_count = 1;
- return -EEXIST;
+ return false;
}
- return 0;
+ return true;
}
/**
@@ -1168,7 +1175,8 @@ static int __caseless_lookup(struct dir_context *ctx, const char *name,
*
* Return: 0 on success, otherwise error
*/
-static int ksmbd_vfs_lookup_in_dir(struct path *dir, char *name, size_t namelen)
+static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
+ size_t namelen, struct unicode_map *um)
{
int ret;
struct file *dfilp;
@@ -1178,6 +1186,7 @@ static int ksmbd_vfs_lookup_in_dir(struct path *dir, char *name, size_t namelen)
.private = name,
.used = namelen,
.dirent_count = 0,
+ .um = um,
};
dfilp = dentry_open(dir, flags, current_cred());
@@ -1240,7 +1249,8 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *name,
break;
err = ksmbd_vfs_lookup_in_dir(&parent, filename,
- filename_len);
+ filename_len,
+ work->conn->um);
path_put(&parent);
if (err)
goto out;
@@ -1743,11 +1753,11 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
*total_size_written = 0;
if (!(src_fp->daccess & (FILE_READ_DATA_LE | FILE_EXECUTE_LE))) {
- pr_err("no right to read(%pd)\n", src_fp->filp->f_path.dentry);
+ pr_err("no right to read(%pD)\n", src_fp->filp);
return -EACCES;
}
if (!(dst_fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE))) {
- pr_err("no right to write(%pd)\n", dst_fp->filp->f_path.dentry);
+ pr_err("no right to write(%pD)\n", dst_fp->filp);
return -EACCES;
}
diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
index 70da4c0ba7ad..593059ca8511 100644
--- a/fs/ksmbd/vfs.h
+++ b/fs/ksmbd/vfs.h
@@ -12,6 +12,7 @@
#include <linux/namei.h>
#include <uapi/linux/xattr.h>
#include <linux/posix_acl.h>
+#include <linux/unicode.h>
#include "smbacl.h"
#include "xattr.h"
@@ -60,6 +61,7 @@ struct ksmbd_readdir_data {
unsigned int used;
unsigned int dirent_count;
unsigned int file_attr;
+ struct unicode_map *um;
};
/* ksmbd kstat wrapper to get valid create time when reading dir entry */
@@ -85,7 +87,7 @@ int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id);
int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name);
int ksmbd_vfs_link(struct ksmbd_work *work,
const char *oldname, const char *newname);
-int ksmbd_vfs_getattr(struct path *path, struct kstat *stat);
+int ksmbd_vfs_getattr(const struct path *path, struct kstat *stat);
int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
char *newname);
int ksmbd_vfs_truncate(struct ksmbd_work *work,
diff --git a/fs/libfs.c b/fs/libfs.c
index 31b0ddf01c31..682d56345a1c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -15,6 +15,7 @@
#include <linux/mutex.h>
#include <linux/namei.h>
#include <linux/exportfs.h>
+#include <linux/iversion.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h> /* sync_mapping_buffers */
#include <linux/fs_context.h>
@@ -1520,3 +1521,48 @@ void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
#endif
}
EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops);
+
+/**
+ * inode_maybe_inc_iversion - increments i_version
+ * @inode: inode with the i_version that should be updated
+ * @force: increment the counter even if it's not necessary?
+ *
+ * Every time the inode is modified, the i_version field must be seen to have
+ * changed by any observer.
+ *
+ * If "force" is set or the QUERIED flag is set, then ensure that we increment
+ * the value, and clear the queried flag.
+ *
+ * In the common case where neither is set, then we can return "false" without
+ * updating i_version.
+ *
+ * If this function returns false, and no other metadata has changed, then we
+ * can avoid logging the metadata.
+ */
+bool inode_maybe_inc_iversion(struct inode *inode, bool force)
+{
+ u64 cur, new;
+
+ /*
+ * The i_version field is not strictly ordered with any other inode
+ * information, but the legacy inode_inc_iversion code used a spinlock
+ * to serialize increments.
+ *
+ * Here, we add full memory barriers to ensure that any de-facto
+ * ordering with other info is preserved.
+ *
+ * This barrier pairs with the barrier in inode_query_iversion()
+ */
+ smp_mb();
+ cur = inode_peek_iversion_raw(inode);
+ do {
+ /* If flag is clear then we needn't do anything */
+ if (!force && !(cur & I_VERSION_QUERIED))
+ return false;
+
+ /* Since lowest bit is flag, add 2 to avoid it */
+ new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT;
+ } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
+ return true;
+}
+EXPORT_SYMBOL(inode_maybe_inc_iversion);
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index f802223e71ab..cdc8e12cdac4 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -164,7 +164,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
host->h_addrbuf = nsm->sm_addrbuf;
host->net = ni->net;
host->h_cred = get_cred(ni->cred);
- strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
+ strscpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
out:
return host;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index bf274f23969b..284b019cb652 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -521,6 +521,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_void,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_void),
+ .pc_argzero = sizeof(struct nlm_void),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "NULL",
@@ -530,6 +531,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_testargs,
.pc_encode = nlm4svc_encode_testres,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St+2+No+Rg,
.pc_name = "TEST",
@@ -539,6 +541,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_lockargs,
.pc_encode = nlm4svc_encode_res,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St,
.pc_name = "LOCK",
@@ -548,6 +551,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_cancargs,
.pc_encode = nlm4svc_encode_res,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St,
.pc_name = "CANCEL",
@@ -557,6 +561,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_unlockargs,
.pc_encode = nlm4svc_encode_res,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St,
.pc_name = "UNLOCK",
@@ -566,6 +571,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_testargs,
.pc_encode = nlm4svc_encode_res,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St,
.pc_name = "GRANTED",
@@ -575,6 +581,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_testargs,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "TEST_MSG",
@@ -584,6 +591,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_lockargs,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "LOCK_MSG",
@@ -593,6 +601,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_cancargs,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "CANCEL_MSG",
@@ -602,6 +611,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_unlockargs,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "UNLOCK_MSG",
@@ -611,6 +621,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_testargs,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "GRANTED_MSG",
@@ -620,6 +631,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_void,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_res),
+ .pc_argzero = sizeof(struct nlm_res),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "TEST_RES",
@@ -629,6 +641,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_void,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_res),
+ .pc_argzero = sizeof(struct nlm_res),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "LOCK_RES",
@@ -638,6 +651,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_void,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_res),
+ .pc_argzero = sizeof(struct nlm_res),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "CANCEL_RES",
@@ -647,6 +661,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_void,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_res),
+ .pc_argzero = sizeof(struct nlm_res),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "UNLOCK_RES",
@@ -656,6 +671,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_res,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_res),
+ .pc_argzero = sizeof(struct nlm_res),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "GRANTED_RES",
@@ -665,6 +681,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_reboot,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_reboot),
+ .pc_argzero = sizeof(struct nlm_reboot),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "SM_NOTIFY",
@@ -674,6 +691,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_void,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_void),
+ .pc_argzero = sizeof(struct nlm_void),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = 0,
.pc_name = "UNUSED",
@@ -683,6 +701,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_void,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_void),
+ .pc_argzero = sizeof(struct nlm_void),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = 0,
.pc_name = "UNUSED",
@@ -692,6 +711,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_void,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_void),
+ .pc_argzero = sizeof(struct nlm_void),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = 0,
.pc_name = "UNUSED",
@@ -701,6 +721,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_shareargs,
.pc_encode = nlm4svc_encode_shareres,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St+1,
.pc_name = "SHARE",
@@ -710,6 +731,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_shareargs,
.pc_encode = nlm4svc_encode_shareres,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St+1,
.pc_name = "UNSHARE",
@@ -719,6 +741,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_lockargs,
.pc_encode = nlm4svc_encode_res,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St,
.pc_name = "NM_LOCK",
@@ -728,6 +751,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
.pc_decode = nlm4svc_decode_notify,
.pc_encode = nlm4svc_encode_void,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "FREE_ALL",
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index b09ca35b527c..e35c05e27806 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -555,6 +555,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_void,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_void),
+ .pc_argzero = sizeof(struct nlm_void),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "NULL",
@@ -564,6 +565,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_testargs,
.pc_encode = nlmsvc_encode_testres,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St+2+No+Rg,
.pc_name = "TEST",
@@ -573,6 +575,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_lockargs,
.pc_encode = nlmsvc_encode_res,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St,
.pc_name = "LOCK",
@@ -582,6 +585,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_cancargs,
.pc_encode = nlmsvc_encode_res,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St,
.pc_name = "CANCEL",
@@ -591,6 +595,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_unlockargs,
.pc_encode = nlmsvc_encode_res,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St,
.pc_name = "UNLOCK",
@@ -600,6 +605,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_testargs,
.pc_encode = nlmsvc_encode_res,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St,
.pc_name = "GRANTED",
@@ -609,6 +615,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_testargs,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "TEST_MSG",
@@ -618,6 +625,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_lockargs,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "LOCK_MSG",
@@ -627,6 +635,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_cancargs,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "CANCEL_MSG",
@@ -636,6 +645,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_unlockargs,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "UNLOCK_MSG",
@@ -645,6 +655,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_testargs,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "GRANTED_MSG",
@@ -654,6 +665,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_void,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_res),
+ .pc_argzero = sizeof(struct nlm_res),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "TEST_RES",
@@ -663,6 +675,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_void,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_res),
+ .pc_argzero = sizeof(struct nlm_res),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "LOCK_RES",
@@ -672,6 +685,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_void,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_res),
+ .pc_argzero = sizeof(struct nlm_res),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "CANCEL_RES",
@@ -681,6 +695,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_void,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_res),
+ .pc_argzero = sizeof(struct nlm_res),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "UNLOCK_RES",
@@ -690,6 +705,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_res,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_res),
+ .pc_argzero = sizeof(struct nlm_res),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "GRANTED_RES",
@@ -699,6 +715,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_reboot,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_reboot),
+ .pc_argzero = sizeof(struct nlm_reboot),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "SM_NOTIFY",
@@ -708,6 +725,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_void,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_void),
+ .pc_argzero = sizeof(struct nlm_void),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "UNUSED",
@@ -717,6 +735,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_void,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_void),
+ .pc_argzero = sizeof(struct nlm_void),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "UNUSED",
@@ -726,6 +745,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_void,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_void),
+ .pc_argzero = sizeof(struct nlm_void),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = St,
.pc_name = "UNUSED",
@@ -735,6 +755,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_shareargs,
.pc_encode = nlmsvc_encode_shareres,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St+1,
.pc_name = "SHARE",
@@ -744,6 +765,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_shareargs,
.pc_encode = nlmsvc_encode_shareres,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St+1,
.pc_name = "UNSHARE",
@@ -753,6 +775,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_lockargs,
.pc_encode = nlmsvc_encode_res,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_res),
.pc_xdrressize = Ck+St,
.pc_name = "NM_LOCK",
@@ -762,6 +785,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_decode = nlmsvc_decode_notify,
.pc_encode = nlmsvc_encode_void,
.pc_argsize = sizeof(struct nlm_args),
+ .pc_argzero = sizeof(struct nlm_args),
.pc_ressize = sizeof(struct nlm_void),
.pc_xdrressize = 0,
.pc_name = "FREE_ALL",
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 47ccfcbe0a22..e272ad738faf 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -90,8 +90,14 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
return -ENOMEM;
INIT_LIST_HEAD(&entry->e_list);
- /* Initial hash reference */
- atomic_set(&entry->e_refcnt, 1);
+ /*
+ * We create entry with two references. One reference is kept by the
+ * hash table, the other reference is used to protect us from
+ * mb_cache_entry_delete_or_get() until the entry is fully setup. This
+ * avoids nesting of cache->c_list_lock into hash table bit locks which
+ * is problematic for RT.
+ */
+ atomic_set(&entry->e_refcnt, 2);
entry->e_key = key;
entry->e_value = value;
entry->e_reusable = reusable;
@@ -106,15 +112,12 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
}
}
hlist_bl_add_head(&entry->e_hash_list, head);
- /*
- * Add entry to LRU list before it can be found by
- * mb_cache_entry_delete() to avoid races
- */
+ hlist_bl_unlock(head);
spin_lock(&cache->c_list_lock);
list_add_tail(&entry->e_list, &cache->c_list);
cache->c_entry_count++;
spin_unlock(&cache->c_list_lock);
- hlist_bl_unlock(head);
+ mb_cache_entry_put(cache, entry);
return 0;
}
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 937fa5fae2b8..8afdc408ca4f 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -53,16 +53,16 @@ static int minix_mknod(struct user_namespace *mnt_userns, struct inode *dir,
}
static int minix_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+ struct file *file, umode_t mode)
{
int error;
struct inode *inode = minix_new_inode(dir, mode, &error);
if (inode) {
minix_set_inode(inode, 0);
mark_inode_dirty(inode);
- d_tmpfile(dentry, inode);
+ d_tmpfile(file, inode);
}
- return error;
+ return finish_open_simple(file, error);
}
static int minix_create(struct user_namespace *mnt_userns, struct inode *dir,
diff --git a/fs/namei.c b/fs/namei.c
index 53b4bc094db2..578c2110df02 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -986,7 +986,7 @@ static int nd_jump_root(struct nameidata *nd)
* Helper to directly jump to a known parsed path from ->get_link,
* caller must have taken a reference to path beforehand.
*/
-int nd_jump_link(struct path *path)
+int nd_jump_link(const struct path *path)
{
int error = -ELOOP;
struct nameidata *nd = current->nameidata;
@@ -1178,7 +1178,7 @@ static bool safe_hardlink_source(struct user_namespace *mnt_userns,
*
* Returns 0 if successful, -ve on error.
*/
-int may_linkat(struct user_namespace *mnt_userns, struct path *link)
+int may_linkat(struct user_namespace *mnt_userns, const struct path *link)
{
struct inode *inode = link->dentry->d_inode;
@@ -3583,72 +3583,94 @@ static int do_open(struct nameidata *nd,
* On non-idmapped mounts or if permission checking is to be performed on the
* raw inode simply passs init_user_ns.
*/
-struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
- struct dentry *dentry, umode_t mode, int open_flag)
+static int vfs_tmpfile(struct user_namespace *mnt_userns,
+ const struct path *parentpath,
+ struct file *file, umode_t mode)
{
- struct dentry *child = NULL;
- struct inode *dir = dentry->d_inode;
+ struct dentry *child;
+ struct inode *dir = d_inode(parentpath->dentry);
struct inode *inode;
int error;
/* we want directory to be writable */
error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
if (error)
- goto out_err;
- error = -EOPNOTSUPP;
+ return error;
if (!dir->i_op->tmpfile)
- goto out_err;
- error = -ENOMEM;
- child = d_alloc(dentry, &slash_name);
+ return -EOPNOTSUPP;
+ child = d_alloc(parentpath->dentry, &slash_name);
if (unlikely(!child))
- goto out_err;
+ return -ENOMEM;
+ file->f_path.mnt = parentpath->mnt;
+ file->f_path.dentry = child;
mode = vfs_prepare_mode(mnt_userns, dir, mode, mode, mode);
- error = dir->i_op->tmpfile(mnt_userns, dir, child, mode);
+ error = dir->i_op->tmpfile(mnt_userns, dir, file, mode);
+ dput(child);
if (error)
- goto out_err;
- error = -ENOENT;
- inode = child->d_inode;
- if (unlikely(!inode))
- goto out_err;
- if (!(open_flag & O_EXCL)) {
+ return error;
+ /* Don't check for other permissions, the inode was just created */
+ error = may_open(mnt_userns, &file->f_path, 0, file->f_flags);
+ if (error)
+ return error;
+ inode = file_inode(file);
+ if (!(file->f_flags & O_EXCL)) {
spin_lock(&inode->i_lock);
inode->i_state |= I_LINKABLE;
spin_unlock(&inode->i_lock);
}
ima_post_create_tmpfile(mnt_userns, inode);
- return child;
+ return 0;
+}
-out_err:
- dput(child);
- return ERR_PTR(error);
+/**
+ * vfs_tmpfile_open - open a tmpfile for kernel internal use
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @parentpath: path of the base directory
+ * @mode: mode of the new tmpfile
+ * @open_flag: flags
+ * @cred: credentials for open
+ *
+ * Create and open a temporary file. The file is not accounted in nr_files,
+ * hence this is only for kernel internal use, and must not be installed into
+ * file tables or such.
+ */
+struct file *vfs_tmpfile_open(struct user_namespace *mnt_userns,
+ const struct path *parentpath,
+ umode_t mode, int open_flag, const struct cred *cred)
+{
+ struct file *file;
+ int error;
+
+ file = alloc_empty_file_noaccount(open_flag, cred);
+ if (!IS_ERR(file)) {
+ error = vfs_tmpfile(mnt_userns, parentpath, file, mode);
+ if (error) {
+ fput(file);
+ file = ERR_PTR(error);
+ }
+ }
+ return file;
}
-EXPORT_SYMBOL(vfs_tmpfile);
+EXPORT_SYMBOL(vfs_tmpfile_open);
static int do_tmpfile(struct nameidata *nd, unsigned flags,
const struct open_flags *op,
struct file *file)
{
struct user_namespace *mnt_userns;
- struct dentry *child;
struct path path;
int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
+
if (unlikely(error))
return error;
error = mnt_want_write(path.mnt);
if (unlikely(error))
goto out;
mnt_userns = mnt_user_ns(path.mnt);
- child = vfs_tmpfile(mnt_userns, path.dentry, op->mode, op->open_flag);
- error = PTR_ERR(child);
- if (IS_ERR(child))
+ error = vfs_tmpfile(mnt_userns, &path, file, op->mode);
+ if (error)
goto out2;
- dput(path.dentry);
- path.dentry = child;
- audit_inode(nd->name, child, 0);
- /* Don't check for other permissions, the inode was just created */
- error = may_open(mnt_userns, &path, 0, op->open_flag);
- if (!error)
- error = vfs_open(&path, file);
+ audit_inode(nd->name, file->f_path.dentry, 0);
out2:
mnt_drop_write(path.mnt);
out:
@@ -5088,7 +5110,7 @@ int page_symlink(struct inode *inode, const char *symname, int len)
const struct address_space_operations *aops = mapping->a_ops;
bool nofs = !mapping_gfp_constraint(mapping, __GFP_FS);
struct page *page;
- void *fsdata;
+ void *fsdata = NULL;
int err;
unsigned int flags;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 8dcb08e1a885..d0cccddb7d08 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1065,6 +1065,7 @@ static const struct svc_procedure nfs4_callback_procedures1[] = {
.pc_func = nfs4_callback_compound,
.pc_encode = nfs4_encode_void,
.pc_argsize = 256,
+ .pc_argzero = 256,
.pc_ressize = 256,
.pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
.pc_name = "COMPOUND",
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index da8da5cdbbc1..f50e025ae406 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -280,7 +280,7 @@ EXPORT_SYMBOL_GPL(nfs_put_client);
static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data)
{
struct nfs_client *clp;
- const struct sockaddr *sap = data->addr;
+ const struct sockaddr *sap = (struct sockaddr *)data->addr;
struct nfs_net *nn = net_generic(data->net, nfs_net_id);
int error;
@@ -666,7 +666,7 @@ static int nfs_init_server(struct nfs_server *server,
struct rpc_timeout timeparms;
struct nfs_client_initdata cl_init = {
.hostname = ctx->nfs_server.hostname,
- .addr = (const struct sockaddr *)&ctx->nfs_server.address,
+ .addr = &ctx->nfs_server._address,
.addrlen = ctx->nfs_server.addrlen,
.nfs_mod = ctx->nfs_mod,
.proto = ctx->nfs_server.protocol,
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5c97cad741a7..ead8a0e06abf 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -228,8 +228,7 @@ again:
*
*/
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
- fmode_t type,
- const nfs4_stateid *stateid,
+ fmode_t type, const nfs4_stateid *stateid,
unsigned long pagemod_limit)
{
struct nfs_delegation *delegation;
@@ -239,25 +238,24 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL) {
spin_lock(&delegation->lock);
- if (nfs4_is_valid_delegation(delegation, 0)) {
- nfs4_stateid_copy(&delegation->stateid, stateid);
- delegation->type = type;
- delegation->pagemod_limit = pagemod_limit;
- oldcred = delegation->cred;
- delegation->cred = get_cred(cred);
- clear_bit(NFS_DELEGATION_NEED_RECLAIM,
- &delegation->flags);
- spin_unlock(&delegation->lock);
- rcu_read_unlock();
- put_cred(oldcred);
- trace_nfs4_reclaim_delegation(inode, type);
- return;
- }
- /* We appear to have raced with a delegation return. */
+ nfs4_stateid_copy(&delegation->stateid, stateid);
+ delegation->type = type;
+ delegation->pagemod_limit = pagemod_limit;
+ oldcred = delegation->cred;
+ delegation->cred = get_cred(cred);
+ clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+ if (test_and_clear_bit(NFS_DELEGATION_REVOKED,
+ &delegation->flags))
+ atomic_long_inc(&nfs_active_delegations);
spin_unlock(&delegation->lock);
+ rcu_read_unlock();
+ put_cred(oldcred);
+ trace_nfs4_reclaim_delegation(inode, type);
+ } else {
+ rcu_read_unlock();
+ nfs_inode_set_delegation(inode, cred, type, stateid,
+ pagemod_limit);
}
- rcu_read_unlock();
- nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit);
}
static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5d6c2ddc7ea6..f594dac436a7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2022,7 +2022,7 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
err = finish_open(file, dentry, do_open);
if (err)
goto out;
- if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
+ if (S_ISREG(file_inode(file)->i_mode))
nfs_file_set_open_context(file, ctx);
else
err = -EOPENSTALE;
@@ -2489,9 +2489,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
spin_unlock(&dentry->d_lock);
goto out;
}
- if (dentry->d_fsdata)
- /* old devname */
- kfree(dentry->d_fsdata);
+ /* old devname */
+ kfree(dentry->d_fsdata);
dentry->d_fsdata = NFS_FSDATA_BLOCKED;
spin_unlock(&dentry->d_lock);
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index e87d500ad95a..6603b5cee029 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -16,8 +16,9 @@
#include "dns_resolve.h"
ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
- struct sockaddr *sa, size_t salen)
+ struct sockaddr_storage *ss, size_t salen)
{
+ struct sockaddr *sa = (struct sockaddr *)ss;
ssize_t ret;
char *ip_addr = NULL;
int ip_len;
@@ -341,7 +342,7 @@ out:
}
ssize_t nfs_dns_resolve_name(struct net *net, char *name,
- size_t namelen, struct sockaddr *sa, size_t salen)
+ size_t namelen, struct sockaddr_storage *ss, size_t salen)
{
struct nfs_dns_ent key = {
.hostname = name,
@@ -354,7 +355,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name,
ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item);
if (ret == 0) {
if (salen >= item->addrlen) {
- memcpy(sa, &item->addr, item->addrlen);
+ memcpy(ss, &item->addr, item->addrlen);
ret = item->addrlen;
} else
ret = -EOVERFLOW;
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h
index 576ff4b54c82..fe3b172c4de1 100644
--- a/fs/nfs/dns_resolve.h
+++ b/fs/nfs/dns_resolve.h
@@ -32,6 +32,6 @@ extern void nfs_dns_resolver_cache_destroy(struct net *net);
#endif
extern ssize_t nfs_dns_resolve_name(struct net *net, char *name,
- size_t namelen, struct sockaddr *sa, size_t salen);
+ size_t namelen, struct sockaddr_storage *sa, size_t salen);
#endif
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e032fe201a36..d8ec889a4b3f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -567,7 +567,8 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
}
wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ nfs_wait_bit_killable,
+ TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
lock_page(page);
mapping = page_file_mapping(page);
@@ -655,9 +656,9 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
goto out;
}
if (mntflags & NFS_MOUNT_WRITE_WAIT) {
- result = filemap_fdatawait_range(file->f_mapping,
- iocb->ki_pos - written,
- iocb->ki_pos - 1);
+ filemap_fdatawait_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
}
result = generic_write_sync(iocb, written);
if (result < 0)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 7d285561e59f..1ec79ccf89ad 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -30,14 +30,20 @@
#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)
#define FF_LAYOUTRETURN_MAXERR 20
+enum nfs4_ff_op_type {
+ NFS4_FF_OP_LAYOUTSTATS,
+ NFS4_FF_OP_LAYOUTRETURN,
+};
+
static unsigned short io_maxretrans;
static const struct pnfs_commit_ops ff_layout_commit_ops;
static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
struct nfs_pgio_header *hdr);
-static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
+static int
+ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
struct nfs42_layoutstat_devinfo *devinfo,
- int dev_limit);
+ int dev_limit, enum nfs4_ff_op_type type);
static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr,
const struct nfs42_layoutstat_devinfo *devinfo,
struct nfs4_ff_layout_mirror *mirror);
@@ -1373,6 +1379,11 @@ static int ff_layout_read_prepare_common(struct rpc_task *task,
return -EIO;
}
+ if (!pnfs_is_valid_lseg(hdr->lseg)) {
+ rpc_exit(task, -EAGAIN);
+ return -EAGAIN;
+ }
+
ff_layout_read_record_layoutstats_start(task, hdr);
return 0;
}
@@ -1553,6 +1564,11 @@ static int ff_layout_write_prepare_common(struct rpc_task *task,
return -EIO;
}
+ if (!pnfs_is_valid_lseg(hdr->lseg)) {
+ rpc_exit(task, -EAGAIN);
+ return -EAGAIN;
+ }
+
ff_layout_write_record_layoutstats_start(task, hdr);
return 0;
}
@@ -1645,15 +1661,23 @@ static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
set_bit(NFS_LSEG_LAYOUTRETURN, &cdata->lseg->pls_flags);
}
-static void ff_layout_commit_prepare_common(struct rpc_task *task,
- struct nfs_commit_data *cdata)
+static int ff_layout_commit_prepare_common(struct rpc_task *task,
+ struct nfs_commit_data *cdata)
{
+ if (!pnfs_is_valid_lseg(cdata->lseg)) {
+ rpc_exit(task, -EAGAIN);
+ return -EAGAIN;
+ }
+
ff_layout_commit_record_layoutstats_start(task, cdata);
+ return 0;
}
static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
{
- ff_layout_commit_prepare_common(task, data);
+ if (ff_layout_commit_prepare_common(task, data))
+ return;
+
rpc_call_start(task);
}
@@ -1949,6 +1973,65 @@ ff_layout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
ff_layout_initiate_commit);
}
+static bool ff_layout_match_rw(const struct rpc_task *task,
+ const struct nfs_pgio_header *hdr,
+ const struct pnfs_layout_segment *lseg)
+{
+ return hdr->lseg == lseg;
+}
+
+static bool ff_layout_match_commit(const struct rpc_task *task,
+ const struct nfs_commit_data *cdata,
+ const struct pnfs_layout_segment *lseg)
+{
+ return cdata->lseg == lseg;
+}
+
+static bool ff_layout_match_io(const struct rpc_task *task, const void *data)
+{
+ const struct rpc_call_ops *ops = task->tk_ops;
+
+ if (ops == &ff_layout_read_call_ops_v3 ||
+ ops == &ff_layout_read_call_ops_v4 ||
+ ops == &ff_layout_write_call_ops_v3 ||
+ ops == &ff_layout_write_call_ops_v4)
+ return ff_layout_match_rw(task, task->tk_calldata, data);
+ if (ops == &ff_layout_commit_call_ops_v3 ||
+ ops == &ff_layout_commit_call_ops_v4)
+ return ff_layout_match_commit(task, task->tk_calldata, data);
+ return false;
+}
+
+static void ff_layout_cancel_io(struct pnfs_layout_segment *lseg)
+{
+ struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg);
+ struct nfs4_ff_layout_mirror *mirror;
+ struct nfs4_ff_layout_ds *mirror_ds;
+ struct nfs4_pnfs_ds *ds;
+ struct nfs_client *ds_clp;
+ struct rpc_clnt *clnt;
+ u32 idx;
+
+ for (idx = 0; idx < flseg->mirror_array_cnt; idx++) {
+ mirror = flseg->mirror_array[idx];
+ mirror_ds = mirror->mirror_ds;
+ if (!mirror_ds)
+ continue;
+ ds = mirror->mirror_ds->ds;
+ if (!ds)
+ continue;
+ ds_clp = ds->ds_clp;
+ if (!ds_clp)
+ continue;
+ clnt = ds_clp->cl_rpcclient;
+ if (!clnt)
+ continue;
+ if (!rpc_cancel_tasks(clnt, -EAGAIN, ff_layout_match_io, lseg))
+ continue;
+ rpc_clnt_disconnect(clnt);
+ }
+}
+
static struct pnfs_ds_commit_info *
ff_layout_get_ds_info(struct inode *inode)
{
@@ -2161,8 +2244,9 @@ ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args)
FF_LAYOUTRETURN_MAXERR);
spin_lock(&args->inode->i_lock);
- ff_args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr,
- &ff_args->devinfo[0], ARRAY_SIZE(ff_args->devinfo));
+ ff_args->num_dev = ff_layout_mirror_prepare_stats(
+ &ff_layout->generic_hdr, &ff_args->devinfo[0],
+ ARRAY_SIZE(ff_args->devinfo), NFS4_FF_OP_LAYOUTRETURN);
spin_unlock(&args->inode->i_lock);
args->ld_private->ops = &layoutreturn_ops;
@@ -2396,7 +2480,7 @@ static const struct nfs4_xdr_opaque_ops layoutstat_ops = {
static int
ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
struct nfs42_layoutstat_devinfo *devinfo,
- int dev_limit)
+ int dev_limit, enum nfs4_ff_op_type type)
{
struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo);
struct nfs4_ff_layout_mirror *mirror;
@@ -2408,7 +2492,9 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
break;
if (IS_ERR_OR_NULL(mirror->mirror_ds))
continue;
- if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags))
+ if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL,
+ &mirror->flags) &&
+ type != NFS4_FF_OP_LAYOUTRETURN)
continue;
/* mirror refcount put in cleanup_layoutstats */
if (!refcount_inc_not_zero(&mirror->ref))
@@ -2448,7 +2534,9 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
spin_lock(&args->inode->i_lock);
ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout);
args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr,
- &args->devinfo[0], dev_count);
+ &args->devinfo[0],
+ dev_count,
+ NFS4_FF_OP_LAYOUTSTATS);
spin_unlock(&args->inode->i_lock);
if (!args->num_dev) {
kfree(args->devinfo);
@@ -2501,6 +2589,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
.prepare_layoutreturn = ff_layout_prepare_layoutreturn,
.sync = pnfs_nfs_generic_sync,
.prepare_layoutstats = ff_layout_prepare_layoutstats,
+ .cancel_io = ff_layout_cancel_io,
};
static int __init nfs4flexfilelayout_init(void)
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 4da701fd1424..09833ec102fc 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -273,9 +273,9 @@ static const struct constant_table nfs_secflavor_tokens[] = {
* Address family must be initialized, and address must not be
* the ANY address for that family.
*/
-static int nfs_verify_server_address(struct sockaddr *addr)
+static int nfs_verify_server_address(struct sockaddr_storage *addr)
{
- switch (addr->sa_family) {
+ switch (addr->ss_family) {
case AF_INET: {
struct sockaddr_in *sa = (struct sockaddr_in *)addr;
return sa->sin_addr.s_addr != htonl(INADDR_ANY);
@@ -969,7 +969,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_fh *mntfh = ctx->mntfh;
- struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ struct sockaddr_storage *sap = &ctx->nfs_server._address;
int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
int ret;
@@ -1044,7 +1044,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
memcpy(sap, &data->addr, sizeof(data->addr));
ctx->nfs_server.addrlen = sizeof(data->addr);
ctx->nfs_server.port = ntohs(data->addr.sin_port);
- if (sap->sa_family != AF_INET ||
+ if (sap->ss_family != AF_INET ||
!nfs_verify_server_address(sap))
goto out_no_address;
@@ -1200,7 +1200,7 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
struct nfs4_mount_data *data)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
- struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ struct sockaddr_storage *sap = &ctx->nfs_server._address;
int ret;
char *c;
@@ -1314,7 +1314,7 @@ static int nfs_fs_context_validate(struct fs_context *fc)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_subversion *nfs_mod;
- struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ struct sockaddr_storage *sap = &ctx->nfs_server._address;
int max_namelen = PAGE_SIZE;
int max_pathlen = NFS_MAXPATHLEN;
int port = 0;
@@ -1540,7 +1540,7 @@ static int nfs_init_fs_context(struct fs_context *fc)
ctx->version = nfss->nfs_client->rpc_ops->version;
ctx->minorversion = nfss->nfs_client->cl_minorversion;
- memcpy(&ctx->nfs_server.address, &nfss->nfs_client->cl_addr,
+ memcpy(&ctx->nfs_server._address, &nfss->nfs_client->cl_addr,
ctx->nfs_server.addrlen);
if (fc->net_ns != net) {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bea7c005119c..6b2cfa59a1a2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -72,18 +72,13 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
return nfs_fileid_to_ino_t(fattr->fileid);
}
-static int nfs_wait_killable(int mode)
+int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- freezable_schedule_unsafe();
+ schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
return 0;
}
-
-int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
-{
- return nfs_wait_killable(mode);
-}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
/**
@@ -318,7 +313,7 @@ struct nfs_find_desc {
static int
nfs_find_actor(struct inode *inode, void *opaque)
{
- struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque;
+ struct nfs_find_desc *desc = opaque;
struct nfs_fh *fh = desc->fh;
struct nfs_fattr *fattr = desc->fattr;
@@ -336,7 +331,7 @@ nfs_find_actor(struct inode *inode, void *opaque)
static int
nfs_init_locked(struct inode *inode, void *opaque)
{
- struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque;
+ struct nfs_find_desc *desc = opaque;
struct nfs_fattr *fattr = desc->fattr;
set_nfs_fileid(inode, fattr->fileid);
@@ -1332,7 +1327,8 @@ int nfs_clear_invalid_mapping(struct address_space *mapping)
*/
for (;;) {
ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ nfs_wait_bit_killable,
+ TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (ret)
goto out;
spin_lock(&inode->i_lock);
@@ -2271,7 +2267,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
static void init_once(void *foo)
{
- struct nfs_inode *nfsi = (struct nfs_inode *) foo;
+ struct nfs_inode *nfsi = foo;
inode_init_once(&nfsi->vfs_inode);
INIT_LIST_HEAD(&nfsi->open_files);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 898dd95bc7a7..647fc3f547cb 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -69,7 +69,7 @@ static inline fmode_t flags_to_mode(int flags)
struct nfs_client_initdata {
unsigned long init_flags;
const char *hostname; /* Hostname of the server */
- const struct sockaddr *addr; /* Address of the server */
+ const struct sockaddr_storage *addr; /* Address of the server */
const char *nodename; /* Hostname of the client */
const char *ip_addr; /* IP address of the client */
size_t addrlen;
@@ -180,7 +180,7 @@ static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc)
/* mount_clnt.c */
struct nfs_mount_request {
- struct sockaddr *sap;
+ struct sockaddr_storage *sap;
size_t salen;
char *hostname;
char *dirpath;
@@ -223,7 +223,7 @@ extern void nfs4_server_set_init_caps(struct nfs_server *);
extern struct nfs_server *nfs4_create_server(struct fs_context *);
extern struct nfs_server *nfs4_create_referral_server(struct fs_context *);
extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
- struct sockaddr *sap, size_t salen,
+ struct sockaddr_storage *sap, size_t salen,
struct net *net);
extern void nfs_free_server(struct nfs_server *server);
extern struct nfs_server *nfs_clone_server(struct nfs_server *,
@@ -235,7 +235,7 @@ extern int nfs_client_init_status(const struct nfs_client *clp);
extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr,
+ const struct sockaddr_storage *ds_addr,
int ds_addrlen, int ds_proto,
unsigned int ds_timeo,
unsigned int ds_retrans,
@@ -243,7 +243,7 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
struct inode *);
extern struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr, int ds_addrlen,
+ const struct sockaddr_storage *ds_addr, int ds_addrlen,
int ds_proto, unsigned int ds_timeo,
unsigned int ds_retrans);
#ifdef CONFIG_PROC_FS
@@ -435,7 +435,6 @@ extern void nfs_zap_acl_cache(struct inode *inode);
extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags);
extern bool nfs_check_cache_invalid(struct inode *, unsigned long);
extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
-extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode);
/* super.c */
extern const struct super_operations nfs_sops;
@@ -503,7 +502,6 @@ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
const struct nfs_pgio_completion_ops *compl_ops);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_commit_free(struct nfs_commit_data *p);
-extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
extern int nfs_initiate_commit(struct rpc_clnt *clnt,
struct nfs_commit_data *data,
@@ -896,13 +894,13 @@ static inline bool nfs_error_is_fatal_on_server(int err)
* Select between a default port value and a user-specified port value.
* If a zero value is set, then autobind will be used.
*/
-static inline void nfs_set_port(struct sockaddr *sap, int *port,
+static inline void nfs_set_port(struct sockaddr_storage *sap, int *port,
const unsigned short default_port)
{
if (*port == NFS_UNSPEC_PORT)
*port = default_port;
- rpc_set_port(sap, *port);
+ rpc_set_port((struct sockaddr *)sap, *port);
}
struct nfs_direct_req {
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index c5e3b6b3366a..68e76b626371 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -158,7 +158,7 @@ int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans)
struct rpc_create_args args = {
.net = info->net,
.protocol = info->protocol,
- .address = info->sap,
+ .address = (struct sockaddr *)info->sap,
.addrsize = info->salen,
.timeout = &mnt_timeout,
.servername = info->hostname,
@@ -245,7 +245,7 @@ void nfs_umount(const struct nfs_mount_request *info)
struct rpc_create_args args = {
.net = info->net,
.protocol = IPPROTO_UDP,
- .address = info->sap,
+ .address = (struct sockaddr *)info->sap,
.addrsize = info->salen,
.timeout = &nfs_umnt_timeout,
.servername = info->hostname,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 3295af4110f1..2f336ace7555 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -175,7 +175,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
}
/* for submounts we want the same server; referrals will reassign */
- memcpy(&ctx->nfs_server.address, &client->cl_addr, client->cl_addrlen);
+ memcpy(&ctx->nfs_server._address, &client->cl_addr, client->cl_addrlen);
ctx->nfs_server.addrlen = client->cl_addrlen;
ctx->nfs_server.port = server->port;
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index b49359afac88..669cda757a5c 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -78,7 +78,7 @@ struct nfs_server *nfs3_clone_server(struct nfs_server *source,
* the MDS.
*/
struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr, int ds_addrlen,
+ const struct sockaddr_storage *ds_addr, int ds_addrlen,
int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
{
struct rpc_timeout ds_timeout;
@@ -98,7 +98,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
char buf[INET6_ADDRSTRLEN + 1];
/* fake a hostname because lockd wants it */
- if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
+ if (rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf)) <= 0)
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 1597eef40d54..2e7579626cf0 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -36,7 +36,8 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX)
break;
- freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME);
+ __set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
+ schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
} while (!fatal_signal_pending(current));
return res;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 6dab9e408372..ecb428512fe1 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -341,7 +341,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
return status;
}
}
- status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping,
+ status = nfs_filemap_write_and_wait_range(src->f_mapping,
pos_src, pos_src + (loff_t)count - 1);
if (status)
return status;
@@ -1093,6 +1093,9 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
&args.seq_args, &res.seq_res, 0);
trace_nfs4_clone(src_inode, dst_inode, &args, status);
if (status == 0) {
+ /* a zero-length count means clone to EOF in src */
+ if (count == 0 && res.dst_fattr->valid & NFS_ATTR_FATTR_SIZE)
+ count = nfs_size_to_loff_t(res.dst_fattr->size) - dst_offset;
nfs42_copy_dest_done(dst_inode, dst_offset, count);
status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
}
@@ -1175,6 +1178,7 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
&res.seq_res, 1);
+ trace_nfs4_removexattr(inode, name, ret);
if (!ret)
nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
@@ -1214,6 +1218,7 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
&res.seq_res, 1);
+ trace_nfs4_setxattr(inode, name, ret);
for (; np > 0; np--)
put_page(pages[np - 1]);
@@ -1246,6 +1251,7 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
&res.seq_res, 0);
+ trace_nfs4_getxattr(inode, name, ret);
if (ret < 0)
return ret;
@@ -1317,6 +1323,7 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
&res.seq_res, 0);
+ trace_nfs4_listxattr(inode, ret);
if (ret >= 0) {
ret = res.copied;
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
index a9bf09fdf2c3..76ae11834206 100644
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -981,7 +981,7 @@ nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc)
static void nfs4_xattr_cache_init_once(void *p)
{
- struct nfs4_xattr_cache *cache = (struct nfs4_xattr_cache *)p;
+ struct nfs4_xattr_cache *cache = p;
spin_lock_init(&cache->listxattr_lock);
atomic_long_set(&cache->nent, 0);
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index b56f05113d36..fe1aeb0f048f 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -569,6 +569,14 @@ static int decode_listxattrs(struct xdr_stream *xdr,
*/
if (status == -ETOOSMALL)
status = -ERANGE;
+ /*
+ * Special case: for LISTXATTRS, NFS4ERR_NOXATTR
+ * should be translated to success with zero-length reply.
+ */
+ if (status == -ENODATA) {
+ res->eof = true;
+ status = 0;
+ }
goto out;
}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 79df6e83881b..cfef738d765e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -281,7 +281,7 @@ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *,
int nfs4_submount(struct fs_context *, struct nfs_server *);
int nfs4_replace_transport(struct nfs_server *server,
const struct nfs4_fs_locations *locations);
-size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr_storage *ss,
size_t salen, struct net *net, int port);
/* nfs4proc.c */
extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
@@ -459,7 +459,6 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *);
/* nfs4renewd.c */
extern void nfs4_schedule_state_renewal(struct nfs_client *);
-extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
extern void nfs4_kill_renewd(struct nfs_client *);
extern void nfs4_renew_state(struct work_struct *);
extern void nfs4_set_lease_period(struct nfs_client *clp, unsigned long lease);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 3c5678aec006..d3051b051a56 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -254,7 +254,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
goto error;
ip_addr = (const char *)buf;
}
- strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
+ strscpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
err = nfs_idmap_new(clp);
if (err < 0) {
@@ -346,6 +346,7 @@ int nfs40_init_client(struct nfs_client *clp)
ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE,
"NFSv4.0 transport Slot table");
if (ret) {
+ nfs4_shutdown_slot_table(tbl);
kfree(tbl);
return ret;
}
@@ -889,7 +890,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
*/
static int nfs4_set_client(struct nfs_server *server,
const char *hostname,
- const struct sockaddr *addr,
+ const struct sockaddr_storage *addr,
const size_t addrlen,
const char *ip_addr,
int proto, const struct rpc_timeout *timeparms,
@@ -924,7 +925,7 @@ static int nfs4_set_client(struct nfs_server *server,
__set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
__set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
- server->port = rpc_get_port(addr);
+ server->port = rpc_get_port((struct sockaddr *)addr);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
@@ -960,7 +961,7 @@ static int nfs4_set_client(struct nfs_server *server,
* the MDS.
*/
struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr, int ds_addrlen,
+ const struct sockaddr_storage *ds_addr, int ds_addrlen,
int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans,
u32 minor_version)
{
@@ -980,7 +981,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
};
char buf[INET6_ADDRSTRLEN + 1];
- if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
+ if (rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf)) <= 0)
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;
@@ -1148,7 +1149,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
/* Get a client record */
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
- &ctx->nfs_server.address,
+ &ctx->nfs_server._address,
ctx->nfs_server.addrlen,
ctx->client_address,
ctx->nfs_server.protocol,
@@ -1238,7 +1239,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT);
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
- &ctx->nfs_server.address,
+ &ctx->nfs_server._address,
ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
XPRT_TRANSPORT_RDMA,
@@ -1254,7 +1255,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
- &ctx->nfs_server.address,
+ &ctx->nfs_server._address,
ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
XPRT_TRANSPORT_TCP,
@@ -1303,14 +1304,14 @@ error:
* Returns zero on success, or a negative errno value.
*/
int nfs4_update_server(struct nfs_server *server, const char *hostname,
- struct sockaddr *sap, size_t salen, struct net *net)
+ struct sockaddr_storage *sap, size_t salen, struct net *net)
{
struct nfs_client *clp = server->nfs_client;
struct rpc_clnt *clnt = server->client;
struct xprt_create xargs = {
.ident = clp->cl_proto,
.net = net,
- .dstaddr = sap,
+ .dstaddr = (struct sockaddr *)sap,
.addrlen = salen,
.servername = hostname,
};
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index ec6afd3c4bca..e3fdd2f45b01 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -583,7 +583,7 @@ static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux)
struct request_key_auth *rka = get_request_key_auth(authkey);
struct rpc_pipe_msg *msg;
struct idmap_msg *im;
- struct idmap *idmap = (struct idmap *)aux;
+ struct idmap *idmap = aux;
struct key *key = rka->target_key;
int ret = -ENOKEY;
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index f2dbf904c598..9a98595bb160 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -164,16 +164,17 @@ static int nfs4_validate_fspath(struct dentry *dentry,
return 0;
}
-size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr_storage *ss,
size_t salen, struct net *net, int port)
{
+ struct sockaddr *sa = (struct sockaddr *)ss;
ssize_t ret;
ret = rpc_pton(net, string, len, sa, salen);
if (ret == 0) {
ret = rpc_uaddr2sockaddr(net, string, len, sa, salen);
if (ret == 0) {
- ret = nfs_dns_resolve_name(net, string, len, sa, salen);
+ ret = nfs_dns_resolve_name(net, string, len, ss, salen);
if (ret < 0)
ret = 0;
}
@@ -331,7 +332,7 @@ static int try_location(struct fs_context *fc,
ctx->nfs_server.addrlen =
nfs_parse_server_name(buf->data, buf->len,
- &ctx->nfs_server.address,
+ &ctx->nfs_server._address,
sizeof(ctx->nfs_server._address),
fc->net_ns, 0);
if (ctx->nfs_server.addrlen == 0)
@@ -483,14 +484,13 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
char *page, char *page2,
const struct nfs4_fs_location *location)
{
- const size_t addr_bufsize = sizeof(struct sockaddr_storage);
struct net *net = rpc_net_ns(server->client);
- struct sockaddr *sap;
+ struct sockaddr_storage *sap;
unsigned int s;
size_t salen;
int error;
- sap = kmalloc(addr_bufsize, GFP_KERNEL);
+ sap = kmalloc(sizeof(*sap), GFP_KERNEL);
if (sap == NULL)
return -ENOMEM;
@@ -506,10 +506,10 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
continue;
salen = nfs_parse_server_name(buf->data, buf->len,
- sap, addr_bufsize, net, 0);
+ sap, sizeof(*sap), net, 0);
if (salen == 0)
continue;
- rpc_set_port(sap, NFS_PORT);
+ rpc_set_port((struct sockaddr *)sap, NFS_PORT);
error = -ENOMEM;
hostname = kmemdup_nul(buf->data, buf->len, GFP_KERNEL);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3ed14a2a84a4..86ed5c0142c3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -416,8 +416,8 @@ static int nfs4_delay_killable(long *timeout)
{
might_sleep();
- freezable_schedule_timeout_killable_unsafe(
- nfs4_update_delay(timeout));
+ __set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
+ schedule_timeout(nfs4_update_delay(timeout));
if (!__fatal_signal_pending(current))
return 0;
return -EINTR;
@@ -427,7 +427,8 @@ static int nfs4_delay_interruptible(long *timeout)
{
might_sleep();
- freezable_schedule_timeout_interruptible_unsafe(nfs4_update_delay(timeout));
+ __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE);
+ schedule_timeout(nfs4_update_delay(timeout));
if (!signal_pending(current))
return 0;
return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS;
@@ -3950,7 +3951,7 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location,
for (i = 0; i < location->nservers; i++) {
struct nfs4_string *srv_loc = &location->servers[i];
- struct sockaddr addr;
+ struct sockaddr_storage addr;
size_t addrlen;
struct xprt_create xprt_args = {
.ident = 0,
@@ -3973,7 +3974,7 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location,
clp->cl_net, server->port);
if (!addrlen)
return;
- xprt_args.dstaddr = &addr;
+ xprt_args.dstaddr = (struct sockaddr *)&addr;
xprt_args.addrlen = addrlen;
servername = kmalloc(srv_loc->len + 1, GFP_KERNEL);
if (!servername)
@@ -6607,7 +6608,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
struct nfs4_delegreturndata *d_data;
struct pnfs_layout_hdr *lo;
- d_data = (struct nfs4_delegreturndata *)data;
+ d_data = data;
if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) {
nfs4_sequence_done(task, &d_data->res.seq_res);
@@ -7137,6 +7138,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
{
struct nfs4_lockdata *data = calldata;
struct nfs4_lock_state *lsp = data->lsp;
+ struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry));
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
@@ -7144,8 +7146,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
data->rpc_status = task->tk_status;
switch (task->tk_status) {
case 0:
- renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
- data->timestamp);
+ renew_lease(server, data->timestamp);
if (data->arg.new_lock && !data->cancelled) {
data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
@@ -7166,6 +7167,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
if (!nfs4_stateid_match(&data->arg.open_stateid,
&lsp->ls_state->open_stateid))
goto out_restart;
+ else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN)
+ goto out_restart;
} else if (!nfs4_stateid_match(&data->arg.lock_stateid,
&lsp->ls_stateid))
goto out_restart;
@@ -7406,7 +7409,8 @@ nfs4_retry_setlk_simple(struct nfs4_state *state, int cmd,
status = nfs4_proc_setlk(state, cmd, request);
if ((status != -EAGAIN) || IS_SETLK(cmd))
break;
- freezable_schedule_timeout_interruptible(timeout);
+ __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
+ schedule_timeout(timeout);
timeout *= 2;
timeout = min_t(unsigned long, NFS4_LOCK_MAXTIMEOUT, timeout);
status = -ERESTARTSYS;
@@ -7474,10 +7478,8 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
break;
status = -ERESTARTSYS;
- freezer_do_not_count();
- wait_woken(&waiter.wait, TASK_INTERRUPTIBLE,
+ wait_woken(&waiter.wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE,
NFS4_LOCK_MAXTIMEOUT);
- freezer_count();
} while (!signalled());
remove_wait_queue(q, &waiter.wait);
@@ -8900,7 +8902,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred)
void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
void *data)
{
- struct nfs4_add_xprt_data *adata = (struct nfs4_add_xprt_data *)data;
+ struct nfs4_add_xprt_data *adata = data;
struct rpc_task *task;
int status;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9bab3e9c702a..a2d2d5d1b088 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -497,8 +497,7 @@ nfs4_alloc_state_owner(struct nfs_server *server,
sp = kzalloc(sizeof(*sp), gfp_flags);
if (!sp)
return NULL;
- sp->so_seqid.owner_id = ida_simple_get(&server->openowner_id, 0, 0,
- gfp_flags);
+ sp->so_seqid.owner_id = ida_alloc(&server->openowner_id, gfp_flags);
if (sp->so_seqid.owner_id < 0) {
kfree(sp);
return NULL;
@@ -534,7 +533,7 @@ static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
{
nfs4_destroy_seqid_counter(&sp->so_seqid);
put_cred(sp->so_cred);
- ida_simple_remove(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
+ ida_free(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
kfree(sp);
}
@@ -877,8 +876,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
refcount_set(&lsp->ls_count, 1);
lsp->ls_state = state;
lsp->ls_owner = fl_owner;
- lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id,
- 0, 0, GFP_KERNEL_ACCOUNT);
+ lsp->ls_seqid.owner_id = ida_alloc(&server->lockowner_id, GFP_KERNEL_ACCOUNT);
if (lsp->ls_seqid.owner_id < 0)
goto out_free;
INIT_LIST_HEAD(&lsp->ls_locks);
@@ -890,7 +888,7 @@ out_free:
void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
{
- ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id);
+ ida_free(&server->lockowner_id, lsp->ls_seqid.owner_id);
nfs4_destroy_seqid_counter(&lsp->ls_seqid);
kfree(lsp);
}
@@ -1314,7 +1312,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
refcount_inc(&clp->cl_count);
res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ nfs_wait_bit_killable,
+ TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (res)
goto out;
if (clp->cl_cons_state < 0)
@@ -1787,6 +1786,7 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
{
+ set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
/* Mark all delegations for reclaim */
nfs_delegation_mark_reclaim(clp);
nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
@@ -2671,6 +2671,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
if (status < 0)
goto out_error;
nfs4_state_end_reclaim_reboot(clp);
+ continue;
}
/* Detect expired delegations... */
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 6ee6ad3674a2..2cff5901c689 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -2097,6 +2097,7 @@ TRACE_EVENT(ff_layout_commit_error,
)
);
+#ifdef CONFIG_NFS_V4_2
TRACE_DEFINE_ENUM(NFS4_CONTENT_DATA);
TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE);
@@ -2105,7 +2106,6 @@ TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE);
{ NFS4_CONTENT_DATA, "DATA" }, \
{ NFS4_CONTENT_HOLE, "HOLE" })
-#ifdef CONFIG_NFS_V4_2
TRACE_EVENT(nfs4_llseek,
TP_PROTO(
const struct inode *inode,
@@ -2496,6 +2496,54 @@ TRACE_EVENT(nfs4_offload_cancel,
__entry->stateid_seq, __entry->stateid_hash
)
);
+
+DECLARE_EVENT_CLASS(nfs4_xattr_event,
+ TP_PROTO(
+ const struct inode *inode,
+ const char *name,
+ int error
+ ),
+
+ TP_ARGS(inode, name, error),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __string(name, name)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error < 0 ? -error : 0;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __assign_str(name, name);
+ ),
+
+ TP_printk(
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "name=%s",
+ -__entry->error, show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle, __get_str(name)
+ )
+);
+#define DEFINE_NFS4_XATTR_EVENT(name) \
+ DEFINE_EVENT(nfs4_xattr_event, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ const char *name, \
+ int error \
+ ), \
+ TP_ARGS(inode, name, error))
+DEFINE_NFS4_XATTR_EVENT(nfs4_getxattr);
+DEFINE_NFS4_XATTR_EVENT(nfs4_setxattr);
+DEFINE_NFS4_XATTR_EVENT(nfs4_removexattr);
+
+DEFINE_NFS4_INODE_EVENT(nfs4_listxattr);
#endif /* CONFIG_NFS_V4_2 */
#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index fa148308822c..620329b7e6ae 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -139,7 +139,7 @@ static int __init nfs_root_setup(char *line)
ROOT_DEV = Root_NFS;
if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
- strlcpy(nfs_root_parms, line, sizeof(nfs_root_parms));
+ strscpy(nfs_root_parms, line, sizeof(nfs_root_parms));
} else {
size_t n = strlen(line) + sizeof(NFS_ROOT) - 1;
if (n >= sizeof(nfs_root_parms))
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2613b7e36eb9..a5db5158c634 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -710,6 +710,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
u32 seq)
{
struct pnfs_layout_segment *lseg, *next;
+ struct nfs_server *server = NFS_SERVER(lo->plh_inode);
int remaining = 0;
dprintk("%s:Begin lo %p\n", __func__, lo);
@@ -722,8 +723,10 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
"offset %llu length %llu\n", __func__,
lseg, lseg->pls_range.iomode, lseg->pls_seq,
lseg->pls_range.offset, lseg->pls_range.length);
- if (!mark_lseg_invalid(lseg, tmp_list))
- remaining++;
+ if (mark_lseg_invalid(lseg, tmp_list))
+ continue;
+ remaining++;
+ pnfs_lseg_cancel_io(server, lseg);
}
dprintk("%s:Return %i\n", __func__, remaining);
return remaining;
@@ -1908,7 +1911,7 @@ static int pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
pnfs_layoutcommit_inode(lo->plh_inode, false);
return wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
nfs_wait_bit_killable,
- TASK_KILLABLE);
+ TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
}
static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
@@ -2485,6 +2488,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
u32 seq)
{
struct pnfs_layout_segment *lseg, *next;
+ struct nfs_server *server = NFS_SERVER(lo->plh_inode);
int remaining = 0;
dprintk("%s:Begin lo %p\n", __func__, lo);
@@ -2507,6 +2511,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
continue;
remaining++;
set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
+ pnfs_lseg_cancel_io(server, lseg);
}
if (remaining) {
@@ -3192,7 +3197,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
status = wait_on_bit_lock_action(&nfsi->flags,
NFS_INO_LAYOUTCOMMITTING,
nfs_wait_bit_killable,
- TASK_KILLABLE);
+ TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (status)
goto out;
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index f331f067691b..e3e6a41f19de 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -169,6 +169,8 @@ struct pnfs_layoutdriver_type {
void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data);
int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args);
int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
+
+ void (*cancel_io)(struct pnfs_layout_segment *lseg);
};
struct pnfs_commit_ops {
@@ -685,6 +687,13 @@ pnfs_lseg_request_intersecting(struct pnfs_layout_segment *lseg, struct nfs_page
req_offset(req), req_last);
}
+static inline void pnfs_lseg_cancel_io(struct nfs_server *server,
+ struct pnfs_layout_segment *lseg)
+{
+ if (server->pnfs_curr_ld->cancel_io)
+ server->pnfs_curr_ld->cancel_io(lseg);
+}
+
extern unsigned int layoutstats_timer;
#ifdef NFS_DEBUG
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 657c242a18ff..5d035dd2d7bf 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -374,12 +374,12 @@ pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
return NULL;
}
-/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head reqest
+/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request
* for @page
* @cinfo - commit info for current inode
* @page - page to search for matching head request
*
- * Returns a the head request if one is found, otherwise returns NULL.
+ * Return: the head request if one is found, otherwise %NULL.
*/
struct nfs_page *
pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
@@ -821,7 +821,7 @@ static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
static struct nfs_client *(*get_v3_ds_connect)(
struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr,
+ const struct sockaddr_storage *ds_addr,
int ds_addrlen,
int ds_proto,
unsigned int ds_timeo,
@@ -882,7 +882,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
continue;
}
clp = get_v3_ds_connect(mds_srv,
- (struct sockaddr *)&da->da_addr,
+ &da->da_addr,
da->da_addrlen, da->da_transport,
timeo, retrans);
if (IS_ERR(clp))
@@ -951,7 +951,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
put_cred(xprtdata.cred);
} else {
clp = nfs4_set_ds_client(mds_srv,
- (struct sockaddr *)&da->da_addr,
+ &da->da_addr,
da->da_addrlen,
da->da_transport, timeo,
retrans, minor_version);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ee66ffdb985e..05ae23657527 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -822,8 +822,7 @@ static int nfs_request_mount(struct fs_context *fc,
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_mount_request request = {
- .sap = (struct sockaddr *)
- &ctx->mount_server.address,
+ .sap = &ctx->mount_server._address,
.dirpath = ctx->nfs_server.export_path,
.protocol = ctx->mount_server.protocol,
.fh = root_fh,
@@ -854,7 +853,7 @@ static int nfs_request_mount(struct fs_context *fc,
* Construct the mount server's address.
*/
if (ctx->mount_server.address.sa_family == AF_UNSPEC) {
- memcpy(request.sap, &ctx->nfs_server.address,
+ memcpy(request.sap, &ctx->nfs_server._address,
ctx->nfs_server.addrlen);
ctx->mount_server.addrlen = ctx->nfs_server.addrlen;
}
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 65c331f75e9c..f21259ead64b 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -84,6 +84,6 @@ int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *);
void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
-int nfsd_reply_cache_stats_open(struct inode *, struct file *);
+int nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index eeed4ae5b4ad..adc4e87a71d2 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -405,22 +405,15 @@ nfsd_file_unhash(struct nfsd_file *nf)
return false;
}
-/*
- * Return true if the file was unhashed.
- */
-static bool
+static void
nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose)
{
trace_nfsd_file_unhash_and_dispose(nf);
- if (!nfsd_file_unhash(nf))
- return false;
- /* keep final reference for nfsd_file_lru_dispose */
- if (refcount_dec_not_one(&nf->nf_ref))
- return true;
-
- nfsd_file_lru_remove(nf);
- list_add(&nf->nf_lru, dispose);
- return true;
+ if (nfsd_file_unhash(nf)) {
+ /* caller must call nfsd_file_dispose_list() later */
+ nfsd_file_lru_remove(nf);
+ list_add(&nf->nf_lru, dispose);
+ }
}
static void
@@ -562,8 +555,6 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
* @lock: LRU list lock (unused)
* @arg: dispose list
*
- * Note this can deadlock with nfsd_file_cache_purge.
- *
* Return values:
* %LRU_REMOVED: @item was removed from the LRU
* %LRU_ROTATE: @item is to be moved to the LRU tail
@@ -748,8 +739,6 @@ nfsd_file_close_inode(struct inode *inode)
*
* Walk the LRU list and close any entries that have not been used since
* the last scan.
- *
- * Note this can deadlock with nfsd_file_cache_purge.
*/
static void
nfsd_file_delayed_close(struct work_struct *work)
@@ -891,16 +880,12 @@ out_err:
goto out;
}
-/*
- * Note this can deadlock with nfsd_file_lru_cb.
- */
static void
__nfsd_file_cache_purge(struct net *net)
{
struct rhashtable_iter iter;
struct nfsd_file *nf;
LIST_HEAD(dispose);
- bool del;
rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter);
do {
@@ -908,16 +893,8 @@ __nfsd_file_cache_purge(struct net *net)
nf = rhashtable_walk_next(&iter);
while (!IS_ERR_OR_NULL(nf)) {
- if (net && nf->nf_net != net)
- continue;
- del = nfsd_file_unhash_and_dispose(nf, &dispose);
-
- /*
- * Deadlock detected! Something marked this entry as
- * unhased, but hasn't removed it from the hash list.
- */
- WARN_ON_ONCE(!del);
-
+ if (!net || nf->nf_net == net)
+ nfsd_file_unhash_and_dispose(nf, &dispose);
nf = rhashtable_walk_next(&iter);
}
@@ -1064,9 +1041,10 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
.need = may_flags & NFSD_FILE_MAY_MASK,
.net = SVC_NET(rqstp),
};
- struct nfsd_file *nf, *new;
- bool retry = true;
+ bool open_retry = true;
+ struct nfsd_file *nf;
__be32 status;
+ int ret;
status = fh_verify(rqstp, fhp, S_IFREG,
may_flags|NFSD_MAY_OWNER_OVERRIDE);
@@ -1076,35 +1054,33 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
key.cred = get_current_cred();
retry:
- /* Avoid allocation if the item is already in cache */
- nf = rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
- nfsd_file_rhash_params);
+ rcu_read_lock();
+ nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+ nfsd_file_rhash_params);
if (nf)
nf = nfsd_file_get(nf);
+ rcu_read_unlock();
if (nf)
goto wait_for_construction;
- new = nfsd_file_alloc(&key, may_flags);
- if (!new) {
+ nf = nfsd_file_alloc(&key, may_flags);
+ if (!nf) {
status = nfserr_jukebox;
goto out_status;
}
- nf = rhashtable_lookup_get_insert_key(&nfsd_file_rhash_tbl,
- &key, &new->nf_rhash,
- nfsd_file_rhash_params);
- if (!nf) {
- nf = new;
+ ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl,
+ &key, &nf->nf_rhash,
+ nfsd_file_rhash_params);
+ if (likely(ret == 0))
goto open_file;
- }
- if (IS_ERR(nf))
- goto insert_err;
- nf = nfsd_file_get(nf);
- if (nf == NULL) {
- nf = new;
- goto open_file;
- }
- nfsd_file_slab_free(&new->nf_rcu);
+
+ nfsd_file_slab_free(&nf->nf_rcu);
+ if (ret == -EEXIST)
+ goto retry;
+ trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
+ status = nfserr_jukebox;
+ goto out_status;
wait_for_construction:
wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
@@ -1112,11 +1088,11 @@ wait_for_construction:
/* Did construction of this file fail? */
if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
- if (!retry) {
+ if (!open_retry) {
status = nfserr_jukebox;
goto out;
}
- retry = false;
+ open_retry = false;
nfsd_file_put_noref(nf);
goto retry;
}
@@ -1164,13 +1140,6 @@ open_file:
smp_mb__after_atomic();
wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
goto out;
-
-insert_err:
- nfsd_file_slab_free(&new->nf_rcu);
- trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, PTR_ERR(nf));
- nf = NULL;
- status = nfserr_jukebox;
- goto out_status;
}
/**
@@ -1212,7 +1181,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
* scraping this file for info should test the labels to ensure they're
* getting the correct field.
*/
-static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
{
unsigned long releases = 0, pages_flushed = 0, evictions = 0;
unsigned long hits = 0, acquisitions = 0;
@@ -1259,8 +1228,3 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
seq_printf(m, "pages flushed: %lu\n", pages_flushed);
return 0;
}
-
-int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, nfsd_file_cache_stats_show, NULL);
-}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 8e8c0c47d67d..357832bac736 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -60,5 +60,5 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
-int nfsd_file_cache_stats_open(struct inode *, struct file *);
+int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index ffe17743cc74..8c854ba3285b 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -192,6 +192,10 @@ struct nfsd_net {
atomic_t nfs4_client_count;
int nfs4_max_clients;
+
+ atomic_t nfsd_courtesy_clients;
+ struct shrinker nfsd_client_shrinker;
+ struct delayed_work nfsd_shrinker_work;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 9edd3c1a30fb..13e6e6897f6c 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -331,6 +331,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
.pc_decode = nfssvc_decode_voidarg,
.pc_encode = nfssvc_encode_voidres,
.pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
.pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST,
@@ -342,6 +343,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
.pc_encode = nfsaclsvc_encode_getaclres,
.pc_release = nfsaclsvc_release_getacl,
.pc_argsize = sizeof(struct nfsd3_getaclargs),
+ .pc_argzero = sizeof(struct nfsd3_getaclargs),
.pc_ressize = sizeof(struct nfsd3_getaclres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+1+2*(1+ACL),
@@ -353,6 +355,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
.pc_encode = nfssvc_encode_attrstatres,
.pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd3_setaclargs),
+ .pc_argzero = sizeof(struct nfsd3_setaclargs),
.pc_ressize = sizeof(struct nfsd_attrstat),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+AT,
@@ -364,6 +367,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
.pc_encode = nfssvc_encode_attrstatres,
.pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_argzero = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd_attrstat),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+AT,
@@ -375,6 +379,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
.pc_encode = nfsaclsvc_encode_accessres,
.pc_release = nfsaclsvc_release_access,
.pc_argsize = sizeof(struct nfsd3_accessargs),
+ .pc_argzero = sizeof(struct nfsd3_accessargs),
.pc_ressize = sizeof(struct nfsd3_accessres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+AT+1,
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 9446c6743664..2fb9ee356455 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -252,6 +252,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = {
.pc_decode = nfssvc_decode_voidarg,
.pc_encode = nfssvc_encode_voidres,
.pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
.pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST,
@@ -263,6 +264,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = {
.pc_encode = nfs3svc_encode_getaclres,
.pc_release = nfs3svc_release_getacl,
.pc_argsize = sizeof(struct nfsd3_getaclargs),
+ .pc_argzero = sizeof(struct nfsd3_getaclargs),
.pc_ressize = sizeof(struct nfsd3_getaclres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+1+2*(1+ACL),
@@ -274,6 +276,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = {
.pc_encode = nfs3svc_encode_setaclres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_setaclargs),
+ .pc_argzero = sizeof(struct nfsd3_setaclargs),
.pc_ressize = sizeof(struct nfsd3_attrstat),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT,
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a41cca619338..923d9a80df92 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -150,7 +150,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
{
struct nfsd3_readargs *argp = rqstp->rq_argp;
struct nfsd3_readres *resp = rqstp->rq_resp;
- u32 max_blocksize = svc_max_payload(rqstp);
unsigned int len;
int v;
@@ -159,7 +158,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
(unsigned long) argp->count,
(unsigned long long) argp->offset);
- argp->count = min_t(u32, argp->count, max_blocksize);
+ argp->count = min_t(u32, argp->count, svc_max_payload(rqstp));
+ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
if (argp->offset > (u64)OFFSET_MAX)
argp->offset = (u64)OFFSET_MAX;
if (argp->offset + argp->count > (u64)OFFSET_MAX)
@@ -563,25 +563,18 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp,
{
struct xdr_buf *buf = &resp->dirlist;
struct xdr_stream *xdr = &resp->xdr;
-
- count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp));
+ unsigned int sendbuf = min_t(unsigned int, rqstp->rq_res.buflen,
+ svc_max_payload(rqstp));
memset(buf, 0, sizeof(*buf));
/* Reserve room for the NULL ptr & eof flag (-2 words) */
- buf->buflen = count - XDR_UNIT * 2;
+ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), sendbuf);
+ buf->buflen -= XDR_UNIT * 2;
buf->pages = rqstp->rq_next_page;
rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
- /* This is xdr_init_encode(), but it assumes that
- * the head kvec has already been consumed. */
- xdr_set_scratch_buffer(xdr, NULL, 0);
- xdr->buf = buf;
- xdr->page_ptr = buf->pages;
- xdr->iov = NULL;
- xdr->p = page_address(*buf->pages);
- xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
- xdr->rqst = NULL;
+ xdr_init_encode_pages(xdr, buf, buf->pages, NULL);
}
/*
@@ -808,6 +801,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_decode = nfssvc_decode_voidarg,
.pc_encode = nfssvc_encode_voidres,
.pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
.pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST,
@@ -819,6 +813,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_getattrres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_argzero = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd3_attrstatres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+AT,
@@ -830,6 +825,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_wccstatres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_sattrargs),
+ .pc_argzero = sizeof(struct nfsd3_sattrargs),
.pc_ressize = sizeof(struct nfsd3_wccstatres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+WC,
@@ -841,6 +837,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_lookupres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_diropargs),
+ .pc_argzero = sizeof(struct nfsd3_diropargs),
.pc_ressize = sizeof(struct nfsd3_diropres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+FH+pAT+pAT,
@@ -852,6 +849,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_accessres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_accessargs),
+ .pc_argzero = sizeof(struct nfsd3_accessargs),
.pc_ressize = sizeof(struct nfsd3_accessres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT+1,
@@ -863,6 +861,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_readlinkres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_argzero = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd3_readlinkres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4,
@@ -874,6 +873,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_readres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_readargs),
+ .pc_argzero = sizeof(struct nfsd3_readargs),
.pc_ressize = sizeof(struct nfsd3_readres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4,
@@ -885,6 +885,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_writeres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_writeargs),
+ .pc_argzero = sizeof(struct nfsd3_writeargs),
.pc_ressize = sizeof(struct nfsd3_writeres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+WC+4,
@@ -896,6 +897,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_createres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_createargs),
+ .pc_argzero = sizeof(struct nfsd3_createargs),
.pc_ressize = sizeof(struct nfsd3_createres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+(1+FH+pAT)+WC,
@@ -907,6 +909,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_createres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_mkdirargs),
+ .pc_argzero = sizeof(struct nfsd3_mkdirargs),
.pc_ressize = sizeof(struct nfsd3_createres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+(1+FH+pAT)+WC,
@@ -918,6 +921,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_createres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_symlinkargs),
+ .pc_argzero = sizeof(struct nfsd3_symlinkargs),
.pc_ressize = sizeof(struct nfsd3_createres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+(1+FH+pAT)+WC,
@@ -929,6 +933,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_createres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_mknodargs),
+ .pc_argzero = sizeof(struct nfsd3_mknodargs),
.pc_ressize = sizeof(struct nfsd3_createres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+(1+FH+pAT)+WC,
@@ -940,6 +945,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_wccstatres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_diropargs),
+ .pc_argzero = sizeof(struct nfsd3_diropargs),
.pc_ressize = sizeof(struct nfsd3_wccstatres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+WC,
@@ -951,6 +957,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_wccstatres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_diropargs),
+ .pc_argzero = sizeof(struct nfsd3_diropargs),
.pc_ressize = sizeof(struct nfsd3_wccstatres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+WC,
@@ -962,6 +969,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_renameres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_renameargs),
+ .pc_argzero = sizeof(struct nfsd3_renameargs),
.pc_ressize = sizeof(struct nfsd3_renameres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+WC+WC,
@@ -973,6 +981,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_linkres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_linkargs),
+ .pc_argzero = sizeof(struct nfsd3_linkargs),
.pc_ressize = sizeof(struct nfsd3_linkres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+pAT+WC,
@@ -984,6 +993,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_readdirres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_readdirargs),
+ .pc_argzero = sizeof(struct nfsd3_readdirargs),
.pc_ressize = sizeof(struct nfsd3_readdirres),
.pc_cachetype = RC_NOCACHE,
.pc_name = "READDIR",
@@ -994,6 +1004,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_readdirres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
+ .pc_argzero = sizeof(struct nfsd3_readdirplusargs),
.pc_ressize = sizeof(struct nfsd3_readdirres),
.pc_cachetype = RC_NOCACHE,
.pc_name = "READDIRPLUS",
@@ -1003,6 +1014,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_decode = nfs3svc_decode_fhandleargs,
.pc_encode = nfs3svc_encode_fsstatres,
.pc_argsize = sizeof(struct nfsd3_fhandleargs),
+ .pc_argzero = sizeof(struct nfsd3_fhandleargs),
.pc_ressize = sizeof(struct nfsd3_fsstatres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT+2*6+1,
@@ -1013,6 +1025,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_decode = nfs3svc_decode_fhandleargs,
.pc_encode = nfs3svc_encode_fsinfores,
.pc_argsize = sizeof(struct nfsd3_fhandleargs),
+ .pc_argzero = sizeof(struct nfsd3_fhandleargs),
.pc_ressize = sizeof(struct nfsd3_fsinfores),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT+12,
@@ -1023,6 +1036,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_decode = nfs3svc_decode_fhandleargs,
.pc_encode = nfs3svc_encode_pathconfres,
.pc_argsize = sizeof(struct nfsd3_fhandleargs),
+ .pc_argzero = sizeof(struct nfsd3_fhandleargs),
.pc_ressize = sizeof(struct nfsd3_pathconfres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT+6,
@@ -1034,6 +1048,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_commitres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_commitargs),
+ .pc_argzero = sizeof(struct nfsd3_commitargs),
.pc_ressize = sizeof(struct nfsd3_commitres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+WC+2,
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 0293b8d65f10..3308dd671ef0 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -571,10 +571,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
args->count = max_blocksize;
args->len = max_blocksize;
}
- if (!xdr_stream_subsegment(xdr, &args->payload, args->count))
- return false;
- return true;
+ return xdr_stream_subsegment(xdr, &args->payload, args->count);
}
bool
@@ -616,8 +614,6 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_symlinkargs *args = rqstp->rq_argp;
struct kvec *head = rqstp->rq_arg.head;
- struct kvec *tail = rqstp->rq_arg.tail;
- size_t remaining;
if (!svcxdr_decode_diropargs3(xdr, &args->ffh, &args->fname, &args->flen))
return false;
@@ -626,16 +622,10 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
if (xdr_stream_decode_u32(xdr, &args->tlen) < 0)
return false;
- /* request sanity */
- remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len;
- remaining -= xdr_stream_pos(xdr);
- if (remaining < xdr_align_size(args->tlen))
- return false;
-
- args->first.iov_base = xdr->p;
+ /* symlink_data */
args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
-
- return true;
+ args->first.iov_base = xdr_inline_decode(xdr, args->tlen);
+ return args->first.iov_base != NULL;
}
bool
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4ce328209f61..f0e69edf5f0f 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1371,11 +1371,21 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_holds_slot = false;
}
-void nfsd4_run_cb(struct nfsd4_callback *cb)
+/**
+ * nfsd4_run_cb - queue up a callback job to run
+ * @cb: callback to queue
+ *
+ * Kick off a callback to do its thing. Returns false if it was already
+ * on a queue, true otherwise.
+ */
+bool nfsd4_run_cb(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
+ bool queued;
nfsd41_cb_inflight_begin(clp);
- if (!nfsd4_queue_cb(cb))
+ queued = nfsd4_queue_cb(cb);
+ if (!queued)
nfsd41_cb_inflight_end(clp);
+ return queued;
}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index f92161ce1f97..e70a1a2999b7 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -82,8 +82,8 @@ ent_init(struct cache_head *cnew, struct cache_head *citm)
new->id = itm->id;
new->type = itm->type;
- strlcpy(new->name, itm->name, sizeof(new->name));
- strlcpy(new->authname, itm->authname, sizeof(new->authname));
+ strscpy(new->name, itm->name, sizeof(new->name));
+ strscpy(new->authname, itm->authname, sizeof(new->authname));
}
static void
@@ -548,7 +548,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
return nfserr_badowner;
memcpy(key.name, name, namelen);
key.name[namelen] = '\0';
- strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
+ strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
ret = idmap_lookup(rqstp, nametoid_lookup, &key, nn->nametoid_cache, &item);
if (ret == -ENOENT)
return nfserr_badowner;
@@ -584,7 +584,7 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr,
int ret;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
+ strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item);
if (ret == -ENOENT)
return encode_ascii_id(xdr, id);
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 2c05692a9abf..3564d1c6f610 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -658,7 +658,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
ktime_t now, cutoff;
const struct nfsd4_layout_ops *ops;
-
+ trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
switch (task->tk_status) {
case 0:
case -NFS4ERR_DELAY:
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a72ab97f77ef..8beb2bc4c328 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -141,7 +141,6 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
static __be32
do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode)
{
- __be32 status;
if (open->op_truncate &&
!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
@@ -156,9 +155,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
if (open->op_share_deny & NFS4_SHARE_DENY_READ)
accmode |= NFSD_MAY_WRITE;
- status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
-
- return status;
+ return fh_verify(rqstp, current_fh, S_IFREG, accmode);
}
static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
@@ -454,7 +451,6 @@ static __be32
do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
{
struct svc_fh *current_fh = &cstate->current_fh;
- __be32 status;
int accmode = 0;
/* We don't know the target directory, and therefore can not
@@ -479,9 +475,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, str
if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH)
accmode = NFSD_MAY_OWNER_OVERRIDE;
- status = do_open_permission(rqstp, current_fh, open, accmode);
-
- return status;
+ return do_open_permission(rqstp, current_fh, open, accmode);
}
static void
@@ -668,11 +662,9 @@ static __be32
nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
- __be32 status;
-
fh_put(&cstate->current_fh);
- status = exp_pseudoroot(rqstp, &cstate->current_fh);
- return status;
+
+ return exp_pseudoroot(rqstp, &cstate->current_fh);
}
static __be32
@@ -1343,7 +1335,7 @@ try_again:
return 0;
}
if (work) {
- strlcpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1);
+ strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1);
refcount_set(&work->nsui_refcnt, 2);
work->nsui_busy = true;
list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list);
@@ -1621,6 +1613,10 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
struct rpc_task *task)
{
+ struct nfsd4_cb_offload *cbo =
+ container_of(cb, struct nfsd4_cb_offload, co_cb);
+
+ trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task);
return 1;
}
@@ -1768,7 +1764,13 @@ static int nfsd4_do_async_copy(void *data)
filp = nfs42_ssc_open(copy->ss_mnt, &copy->c_fh,
&copy->stateid);
if (IS_ERR(filp)) {
- nfserr = nfserr_offload_denied;
+ switch (PTR_ERR(filp)) {
+ case -EBADF:
+ nfserr = nfserr_wrong_type;
+ break;
+ default:
+ nfserr = nfserr_offload_denied;
+ }
nfsd4_interssc_disconnect(copy->ss_mnt);
goto do_callback;
}
@@ -1826,7 +1828,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!nfs4_init_copy_state(nn, copy))
goto out_err;
refcount_set(&async_copy->refcount, 1);
- memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.stid,
+ memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.cs_stid,
sizeof(copy->cp_res.cb_stateid));
dup_copy_fields(copy, async_copy);
async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
@@ -1862,7 +1864,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid)
spin_lock(&clp->async_lock);
list_for_each_entry(copy, &clp->async_copies, copies) {
- if (memcmp(&copy->cp_stateid.stid, stateid, NFS4_STATEID_SIZE))
+ if (memcmp(&copy->cp_stateid.cs_stid, stateid, NFS4_STATEID_SIZE))
continue;
refcount_inc(&copy->refcount);
spin_unlock(&clp->async_lock);
@@ -1916,7 +1918,7 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cps = nfs4_alloc_init_cpntf_state(nn, stid);
if (!cps)
goto out;
- memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t));
+ memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.cs_stid, sizeof(stateid_t));
memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t));
memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t));
@@ -2633,9 +2635,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
status = nfserr_minor_vers_mismatch;
if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0)
goto out;
- status = nfserr_resource;
- if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
- goto out;
status = nfs41_check_op_ordering(args);
if (status) {
@@ -2648,10 +2647,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
rqstp->rq_lease_breaker = (void **)&cstate->clp;
- trace_nfsd_compound(rqstp, args->opcnt);
+ trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt);
while (!status && resp->opcnt < args->opcnt) {
op = &args->ops[resp->opcnt++];
+ if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) {
+ /* If there are still more operations to process,
+ * stop here and report NFS4ERR_RESOURCE. */
+ if (cstate->minorversion == 0 &&
+ args->client_opcnt > resp->opcnt) {
+ op->status = nfserr_resource;
+ goto encode_op;
+ }
+ }
+
/*
* The XDR decode routines may have pre-set op->status;
* for example, if there is a miscellaneous XDR error
@@ -2727,8 +2736,8 @@ encode_op:
status = op->status;
}
- trace_nfsd_compound_status(args->opcnt, resp->opcnt, status,
- nfsd4_op_name(op->opnum));
+ trace_nfsd_compound_status(args->client_opcnt, resp->opcnt,
+ status, nfsd4_op_name(op->opnum));
nfsd4_cstate_clear_replay(cstate);
nfsd4_increment_op_stats(op->opnum);
@@ -2762,28 +2771,49 @@ out:
#define op_encode_channel_attrs_maxsz (6 + 1 + 1)
-static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+/*
+ * The _rsize() helpers are invoked by the NFSv4 COMPOUND decoder, which
+ * is called before sunrpc sets rq_res.buflen. Thus we have to compute
+ * the maximum payload size here, based on transport limits and the size
+ * of the remaining space in the rq_pages array.
+ */
+static u32 nfsd4_max_payload(const struct svc_rqst *rqstp)
+{
+ u32 buflen;
+
+ buflen = (rqstp->rq_page_end - rqstp->rq_next_page) * PAGE_SIZE;
+ buflen -= rqstp->rq_auth_slack;
+ buflen -= rqstp->rq_res.head[0].iov_len;
+ return min_t(u32, buflen, svc_max_payload(rqstp));
+}
+
+static u32 nfsd4_only_status_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size) * sizeof(__be32);
}
-static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_status_stateid_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
}
-static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_access_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
/* ac_supported, ac_resp_access */
return (op_encode_hdr_size + 2)* sizeof(__be32);
}
-static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_commit_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
}
-static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_create_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz
+ nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
@@ -2794,17 +2824,17 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op
* the op prematurely if the estimate is too large. We may turn off splice
* reads unnecessarily.
*/
-static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
- struct nfsd4_op *op)
+static u32 nfsd4_getattr_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
- u32 *bmap = op->u.getattr.ga_bmval;
+ const u32 *bmap = op->u.getattr.ga_bmval;
u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2];
u32 ret = 0;
if (bmap0 & FATTR4_WORD0_ACL)
- return svc_max_payload(rqstp);
+ return nfsd4_max_payload(rqstp);
if (bmap0 & FATTR4_WORD0_FS_LOCATIONS)
- return svc_max_payload(rqstp);
+ return nfsd4_max_payload(rqstp);
if (bmap1 & FATTR4_WORD1_OWNER) {
ret += IDMAP_NAMESZ + 4;
@@ -2832,24 +2862,28 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
return ret;
}
-static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_getfh_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE;
}
-static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_link_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
* sizeof(__be32);
}
-static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_lock_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_lock_denied_maxsz)
* sizeof(__be32);
}
-static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_open_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_stateid_maxsz
+ op_encode_change_info_maxsz + 1
@@ -2857,20 +2891,18 @@ static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+ op_encode_delegation_maxsz) * sizeof(__be32);
}
-static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_read_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
- u32 maxcount = 0, rlen = 0;
-
- maxcount = svc_max_payload(rqstp);
- rlen = min(op->u.read.rd_length, maxcount);
+ u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp));
return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
}
-static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_read_plus_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
- u32 maxcount = svc_max_payload(rqstp);
- u32 rlen = min(op->u.read.rd_length, maxcount);
+ u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp));
/*
* If we detect that the file changed during hole encoding, then we
* recover by encoding the remaining reply as data. This means we need
@@ -2881,70 +2913,77 @@ static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op
return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32);
}
-static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_readdir_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
- u32 maxcount = 0, rlen = 0;
-
- maxcount = svc_max_payload(rqstp);
- rlen = min(op->u.readdir.rd_maxcount, maxcount);
+ u32 rlen = min(op->u.readdir.rd_maxcount, nfsd4_max_payload(rqstp));
return (op_encode_hdr_size + op_encode_verifier_maxsz +
XDR_QUADLEN(rlen)) * sizeof(__be32);
}
-static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_readlink_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE;
}
-static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_remove_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
* sizeof(__be32);
}
-static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_rename_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz
+ op_encode_change_info_maxsz) * sizeof(__be32);
}
-static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
- struct nfsd4_op *op)
+static u32 nfsd4_sequence_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
}
-static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_test_stateid_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids)
* sizeof(__be32);
}
-static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_setattr_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
}
-static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_secinfo_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR *
(4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32);
}
-static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_setclientid_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
sizeof(__be32);
}
-static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_write_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32);
}
-static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_exchange_id_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
1 + 1 + /* eir_flags, spr_how */\
@@ -2958,14 +2997,16 @@ static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_o
0 /* ignored eir_server_impl_id contents */) * sizeof(__be32);
}
-static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_bind_conn_to_session_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + \
XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\
2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32);
}
-static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_create_session_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + \
XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\
@@ -2974,7 +3015,8 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd
op_encode_channel_attrs_maxsz) * sizeof(__be32);
}
-static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_copy_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size +
1 /* wr_callback */ +
@@ -2986,16 +3028,16 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1 /* cr_synchronous */) * sizeof(__be32);
}
-static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
- struct nfsd4_op *op)
+static u32 nfsd4_offload_status_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size +
2 /* osr_count */ +
1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32);
}
-static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp,
- struct nfsd4_op *op)
+static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size +
3 /* cnr_lease_time */ +
@@ -3010,12 +3052,10 @@ static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp,
}
#ifdef CONFIG_NFSD_PNFS
-static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
- u32 maxcount = 0, rlen = 0;
-
- maxcount = svc_max_payload(rqstp);
- rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount);
+ u32 rlen = min(op->u.getdeviceinfo.gd_maxcount, nfsd4_max_payload(rqstp));
return (op_encode_hdr_size +
1 /* gd_layout_type*/ +
@@ -3028,7 +3068,8 @@ static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4
* so we need to define an arbitrary upper bound here.
*/
#define MAX_LAYOUT_SIZE 128
-static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_layoutget_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size +
1 /* logr_return_on_close */ +
@@ -3037,14 +3078,16 @@ static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op
MAX_LAYOUT_SIZE) * sizeof(__be32);
}
-static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_layoutcommit_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size +
1 /* locr_newsize */ +
2 /* ns_size */) * sizeof(__be32);
}
-static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size +
1 /* lrs_stateid */ +
@@ -3053,41 +3096,36 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_
#endif /* CONFIG_NFSD_PNFS */
-static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 3) * sizeof(__be32);
}
-static inline u32 nfsd4_getxattr_rsize(struct svc_rqst *rqstp,
- struct nfsd4_op *op)
+static u32 nfsd4_getxattr_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
- u32 maxcount, rlen;
-
- maxcount = svc_max_payload(rqstp);
- rlen = min_t(u32, XATTR_SIZE_MAX, maxcount);
+ u32 rlen = min_t(u32, XATTR_SIZE_MAX, nfsd4_max_payload(rqstp));
return (op_encode_hdr_size + 1 + XDR_QUADLEN(rlen)) * sizeof(__be32);
}
-static inline u32 nfsd4_setxattr_rsize(struct svc_rqst *rqstp,
- struct nfsd4_op *op)
+static u32 nfsd4_setxattr_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
* sizeof(__be32);
}
-static inline u32 nfsd4_listxattrs_rsize(struct svc_rqst *rqstp,
- struct nfsd4_op *op)
+static u32 nfsd4_listxattrs_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
- u32 maxcount, rlen;
-
- maxcount = svc_max_payload(rqstp);
- rlen = min(op->u.listxattrs.lsxa_maxcount, maxcount);
+ u32 rlen = min(op->u.listxattrs.lsxa_maxcount, nfsd4_max_payload(rqstp));
return (op_encode_hdr_size + 4 + XDR_QUADLEN(rlen)) * sizeof(__be32);
}
-static inline u32 nfsd4_removexattr_rsize(struct svc_rqst *rqstp,
- struct nfsd4_op *op)
+static u32 nfsd4_removexattr_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
* sizeof(__be32);
@@ -3576,6 +3614,7 @@ static const struct svc_procedure nfsd_procedures4[2] = {
.pc_decode = nfssvc_decode_voidarg,
.pc_encode = nfssvc_encode_voidres,
.pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
.pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = 1,
@@ -3586,6 +3625,7 @@ static const struct svc_procedure nfsd_procedures4[2] = {
.pc_decode = nfs4svc_decode_compoundargs,
.pc_encode = nfs4svc_encode_compoundres,
.pc_argsize = sizeof(struct nfsd4_compoundargs),
+ .pc_argzero = offsetof(struct nfsd4_compoundargs, iops),
.pc_ressize = sizeof(struct nfsd4_compoundres),
.pc_release = nfsd4_release_compoundargs,
.pc_cachetype = RC_NOCACHE,
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index c634483d85d2..78b8cd9651d5 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -266,7 +266,7 @@ struct nfs4_dir_ctx {
struct list_head names;
};
-static int
+static bool
nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
@@ -275,14 +275,14 @@ nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen,
struct name_list *entry;
if (namlen != HEXDIR_LEN - 1)
- return 0;
+ return true;
entry = kmalloc(sizeof(struct name_list), GFP_KERNEL);
if (entry == NULL)
- return -ENOMEM;
+ return false;
memcpy(entry->name, name, HEXDIR_LEN - 1);
entry->name[HEXDIR_LEN - 1] = '\0';
list_add(&entry->list, &ctx->names);
- return 0;
+ return true;
}
static int
@@ -807,16 +807,18 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
if (get_user(namelen, &ci->cc_name.cn_len))
return -EFAULT;
name.data = memdup_user(&ci->cc_name.cn_id, namelen);
- if (IS_ERR_OR_NULL(name.data))
- return -EFAULT;
+ if (IS_ERR(name.data))
+ return PTR_ERR(name.data);
name.len = namelen;
get_user(princhashlen, &ci->cc_princhash.cp_len);
if (princhashlen > 0) {
princhash.data = memdup_user(
&ci->cc_princhash.cp_data,
princhashlen);
- if (IS_ERR_OR_NULL(princhash.data))
- return -EFAULT;
+ if (IS_ERR(princhash.data)) {
+ kfree(name.data);
+ return PTR_ERR(princhash.data);
+ }
princhash.len = princhashlen;
} else
princhash.len = 0;
@@ -827,8 +829,8 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
if (get_user(namelen, &cnm->cn_len))
return -EFAULT;
name.data = memdup_user(&cnm->cn_id, namelen);
- if (IS_ERR_OR_NULL(name.data))
- return -EFAULT;
+ if (IS_ERR(name.data))
+ return PTR_ERR(name.data);
name.len = namelen;
}
if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c5d199d7e6b4..4e718500a00c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -160,6 +160,13 @@ static bool is_client_expired(struct nfs4_client *clp)
return clp->cl_time == 0;
}
+static void nfsd4_dec_courtesy_client_count(struct nfsd_net *nn,
+ struct nfs4_client *clp)
+{
+ if (clp->cl_state != NFSD4_ACTIVE)
+ atomic_add_unless(&nn->nfsd_courtesy_clients, -1, 0);
+}
+
static __be32 get_client_locked(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -169,6 +176,7 @@ static __be32 get_client_locked(struct nfs4_client *clp)
if (is_client_expired(clp))
return nfserr_expired;
atomic_inc(&clp->cl_rpc_users);
+ nfsd4_dec_courtesy_client_count(nn, clp);
clp->cl_state = NFSD4_ACTIVE;
return nfs_ok;
}
@@ -190,6 +198,7 @@ renew_client_locked(struct nfs4_client *clp)
list_move_tail(&clp->cl_lru, &nn->client_lru);
clp->cl_time = ktime_get_boottime_seconds();
+ nfsd4_dec_courtesy_client_count(nn, clp);
clp->cl_state = NFSD4_ACTIVE;
}
@@ -357,6 +366,8 @@ nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
static int
nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
+ trace_nfsd_cb_notify_lock_done(&zero_stateid, task);
+
/*
* Since this is just an optimization, we don't try very hard if it
* turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
@@ -963,19 +974,19 @@ out_free:
* Create a unique stateid_t to represent each COPY.
*/
static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
- unsigned char sc_type)
+ unsigned char cs_type)
{
int new_id;
- stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
- stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
- stid->sc_type = sc_type;
+ stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
+ stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+ stid->cs_type = cs_type;
idr_preload(GFP_KERNEL);
spin_lock(&nn->s2s_cp_lock);
new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT);
- stid->stid.si_opaque.so_id = new_id;
- stid->stid.si_generation = 1;
+ stid->cs_stid.si_opaque.so_id = new_id;
+ stid->cs_stid.si_generation = 1;
spin_unlock(&nn->s2s_cp_lock);
idr_preload_end();
if (new_id < 0)
@@ -997,7 +1008,7 @@ struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
if (!cps)
return NULL;
cps->cpntf_time = ktime_get_boottime_seconds();
- refcount_set(&cps->cp_stateid.sc_count, 1);
+ refcount_set(&cps->cp_stateid.cs_count, 1);
if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID))
goto out_free;
spin_lock(&nn->s2s_cp_lock);
@@ -1013,11 +1024,11 @@ void nfs4_free_copy_state(struct nfsd4_copy *copy)
{
struct nfsd_net *nn;
- WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID);
+ WARN_ON_ONCE(copy->cp_stateid.cs_type != NFS4_COPY_STID);
nn = net_generic(copy->cp_clp->net, nfsd_net_id);
spin_lock(&nn->s2s_cp_lock);
idr_remove(&nn->s2s_cp_stateids,
- copy->cp_stateid.stid.si_opaque.so_id);
+ copy->cp_stateid.cs_stid.si_opaque.so_id);
spin_unlock(&nn->s2s_cp_lock);
}
@@ -1049,6 +1060,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
static void nfs4_free_deleg(struct nfs4_stid *stid)
{
+ struct nfs4_delegation *dp = delegstateid(stid);
+
+ WARN_ON_ONCE(!list_empty(&stid->sc_cp_list));
+ WARN_ON_ONCE(!list_empty(&dp->dl_perfile));
+ WARN_ON_ONCE(!list_empty(&dp->dl_perclnt));
+ WARN_ON_ONCE(!list_empty(&dp->dl_recall_lru));
kmem_cache_free(deleg_slab, stid);
atomic_long_dec(&num_delegations);
}
@@ -1462,6 +1479,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
release_all_access(stp);
if (stp->st_stateowner)
nfs4_put_stateowner(stp->st_stateowner);
+ WARN_ON(!list_empty(&stid->sc_cp_list));
kmem_cache_free(stateid_slab, stid);
}
@@ -2233,6 +2251,7 @@ __destroy_client(struct nfs4_client *clp)
if (clp->cl_cb_conn.cb_xprt)
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
atomic_add_unless(&nn->nfs4_client_count, -1, 0);
+ nfsd4_dec_courtesy_client_count(nn, clp);
free_client(clp);
wake_up_all(&expiry_wq);
}
@@ -2478,7 +2497,7 @@ static const char *cb_state2str(int state)
static int client_info_show(struct seq_file *m, void *v)
{
- struct inode *inode = m->private;
+ struct inode *inode = file_inode(m->file);
struct nfs4_client *clp;
u64 clid;
@@ -2518,17 +2537,7 @@ static int client_info_show(struct seq_file *m, void *v)
return 0;
}
-static int client_info_open(struct inode *inode, struct file *file)
-{
- return single_open(file, client_info_show, inode);
-}
-
-static const struct file_operations client_info_fops = {
- .open = client_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(client_info);
static void *states_start(struct seq_file *s, loff_t *pos)
__acquires(&clp->cl_lock)
@@ -4337,7 +4346,27 @@ out:
return -ENOMEM;
}
-void nfsd4_init_leases_net(struct nfsd_net *nn)
+static unsigned long
+nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int cnt;
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_client_shrinker);
+
+ cnt = atomic_read(&nn->nfsd_courtesy_clients);
+ if (cnt > 0)
+ mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
+ return (unsigned long)cnt;
+}
+
+static unsigned long
+nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ return SHRINK_STOP;
+}
+
+int
+nfsd4_init_leases_net(struct nfsd_net *nn)
{
struct sysinfo si;
u64 max_clients;
@@ -4346,8 +4375,8 @@ void nfsd4_init_leases_net(struct nfsd_net *nn)
nn->nfsd4_grace = 90;
nn->somebody_reclaimed = false;
nn->track_reclaim_completes = false;
- nn->clverifier_counter = prandom_u32();
- nn->clientid_base = prandom_u32();
+ nn->clverifier_counter = get_random_u32();
+ nn->clientid_base = get_random_u32();
nn->clientid_counter = nn->clientid_base + 1;
nn->s2s_cp_cl_id = nn->clientid_counter++;
@@ -4356,6 +4385,18 @@ void nfsd4_init_leases_net(struct nfsd_net *nn)
max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024);
max_clients *= NFS4_CLIENTS_PER_GB;
nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
+
+ atomic_set(&nn->nfsd_courtesy_clients, 0);
+ nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
+ nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
+ nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
+ return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+}
+
+void
+nfsd4_leases_net_shutdown(struct nfsd_net *nn)
+{
+ unregister_shrinker(&nn->nfsd_client_shrinker);
}
static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -4715,6 +4756,35 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
return ret;
}
+static bool nfsd4_deleg_present(const struct inode *inode)
+{
+ struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx);
+
+ return ctx && !list_empty_careful(&ctx->flc_lease);
+}
+
+/**
+ * nfsd_wait_for_delegreturn - wait for delegations to be returned
+ * @rqstp: the RPC transaction being executed
+ * @inode: in-core inode of the file being waited for
+ *
+ * The timeout prevents deadlock if all nfsd threads happen to be
+ * tied up waiting for returning delegations.
+ *
+ * Return values:
+ * %true: delegation was returned
+ * %false: timed out waiting for delegreturn
+ */
+bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode)
+{
+ long __maybe_unused timeo;
+
+ timeo = wait_var_event_timeout(inode, !nfsd4_deleg_present(inode),
+ NFSD_DELEGRETURN_TIMEOUT);
+ trace_nfsd_delegret_wakeup(rqstp, inode, timeo);
+ return timeo > 0;
+}
+
static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
{
struct nfs4_delegation *dp = cb_to_delegation(cb);
@@ -4743,6 +4813,8 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
{
struct nfs4_delegation *dp = cb_to_delegation(cb);
+ trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);
+
if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
return 1;
@@ -4788,18 +4860,17 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
* callback (and since the lease code is serialized by the
- * i_lock) we know the server hasn't removed the lease yet, and
+ * flc_lock) we know the server hasn't removed the lease yet, and
* we know it's safe to take a reference.
*/
refcount_inc(&dp->dl_stid.sc_count);
- nfsd4_run_cb(&dp->dl_recall);
+ WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
}
-/* Called from break_lease() with i_lock held. */
+/* Called from break_lease() with flc_lock held. */
static bool
nfsd_break_deleg_cb(struct file_lock *fl)
{
- bool ret = false;
struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
struct nfs4_file *fp = dp->dl_stid.sc_file;
struct nfs4_client *clp = dp->dl_stid.sc_client;
@@ -4825,7 +4896,7 @@ nfsd_break_deleg_cb(struct file_lock *fl)
fp->fi_had_conflict = true;
nfsd_break_one_deleg(dp);
spin_unlock(&fp->fi_lock);
- return ret;
+ return false;
}
/**
@@ -5878,8 +5949,11 @@ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
goto exp_client;
if (!state_expired(lt, clp->cl_time))
break;
- if (!atomic_read(&clp->cl_rpc_users))
+ if (!atomic_read(&clp->cl_rpc_users)) {
+ if (clp->cl_state == NFSD4_ACTIVE)
+ atomic_inc(&nn->nfsd_courtesy_clients);
clp->cl_state = NFSD4_COURTESY;
+ }
if (!client_has_state(clp))
goto exp_client;
if (!nfs4_anylock_blockers(clp))
@@ -5894,10 +5968,49 @@ exp_client:
spin_unlock(&nn->client_lock);
}
+static void
+nfs4_get_courtesy_client_reaplist(struct nfsd_net *nn,
+ struct list_head *reaplist)
+{
+ unsigned int maxreap = 0, reapcnt = 0;
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+
+ maxreap = NFSD_CLIENT_MAX_TRIM_PER_RUN;
+ INIT_LIST_HEAD(reaplist);
+
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (clp->cl_state == NFSD4_ACTIVE)
+ break;
+ if (reapcnt >= maxreap)
+ break;
+ if (!mark_client_expired_locked(clp)) {
+ list_add(&clp->cl_lru, reaplist);
+ reapcnt++;
+ }
+ }
+ spin_unlock(&nn->client_lock);
+}
+
+static void
+nfs4_process_client_reaplist(struct list_head *reaplist)
+{
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+
+ list_for_each_safe(pos, next, reaplist) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ trace_nfsd_clid_purged(&clp->cl_clientid);
+ list_del_init(&clp->cl_lru);
+ expire_client(clp);
+ }
+}
+
static time64_t
nfs4_laundromat(struct nfsd_net *nn)
{
- struct nfs4_client *clp;
struct nfs4_openowner *oo;
struct nfs4_delegation *dp;
struct nfs4_ol_stateid *stp;
@@ -5920,18 +6033,14 @@ nfs4_laundromat(struct nfsd_net *nn)
spin_lock(&nn->s2s_cp_lock);
idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
- if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID &&
+ if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID &&
state_expired(&lt, cps->cpntf_time))
_free_cpntf_state_locked(nn, cps);
}
spin_unlock(&nn->s2s_cp_lock);
nfs4_get_client_reaplist(nn, &reaplist, &lt);
- list_for_each_safe(pos, next, &reaplist) {
- clp = list_entry(pos, struct nfs4_client, cl_lru);
- trace_nfsd_clid_purged(&clp->cl_clientid);
- list_del_init(&clp->cl_lru);
- expire_client(clp);
- }
+ nfs4_process_client_reaplist(&reaplist);
+
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
@@ -6014,6 +6123,18 @@ laundromat_main(struct work_struct *laundry)
queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
}
+static void
+courtesy_client_reaper(struct work_struct *reaper)
+{
+ struct list_head reaplist;
+ struct delayed_work *dwork = to_delayed_work(reaper);
+ struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ nfsd_shrinker_work);
+
+ nfs4_get_courtesy_client_reaplist(nn, &reaplist);
+ nfs4_process_client_reaplist(&reaplist);
+}
+
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
{
if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -6149,6 +6270,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
struct nfs4_stid **s, struct nfsd_net *nn)
{
__be32 status;
+ struct nfs4_stid *stid;
bool return_revoked = false;
/*
@@ -6171,15 +6293,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
}
if (status)
return status;
- *s = find_stateid_by_type(cstate->clp, stateid, typemask);
- if (!*s)
+ stid = find_stateid_by_type(cstate->clp, stateid, typemask);
+ if (!stid)
return nfserr_bad_stateid;
- if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
- nfs4_put_stid(*s);
+ if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
+ nfs4_put_stid(stid);
if (cstate->minorversion)
return nfserr_deleg_revoked;
return nfserr_bad_stateid;
}
+ *s = stid;
return nfs_ok;
}
@@ -6244,12 +6367,12 @@ out:
static void
_free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
{
- WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID);
- if (!refcount_dec_and_test(&cps->cp_stateid.sc_count))
+ WARN_ON_ONCE(cps->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID);
+ if (!refcount_dec_and_test(&cps->cp_stateid.cs_count))
return;
list_del(&cps->cp_list);
idr_remove(&nn->s2s_cp_stateids,
- cps->cp_stateid.stid.si_opaque.so_id);
+ cps->cp_stateid.cs_stid.si_opaque.so_id);
kfree(cps);
}
/*
@@ -6271,12 +6394,12 @@ __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
if (cps_t) {
state = container_of(cps_t, struct nfs4_cpntf_state,
cp_stateid);
- if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) {
+ if (state->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID) {
state = NULL;
goto unlock;
}
if (!clp)
- refcount_inc(&state->cp_stateid.sc_count);
+ refcount_inc(&state->cp_stateid.cs_count);
else
_free_cpntf_state_locked(nn, state);
}
@@ -6684,6 +6807,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
struct nfs4_client *clp = s->st_stid.sc_client;
bool unhashed;
LIST_HEAD(reaplist);
+ struct nfs4_ol_stateid *stp;
spin_lock(&clp->cl_lock);
unhashed = unhash_open_stateid(s, &reaplist);
@@ -6692,6 +6816,8 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
if (unhashed)
put_ol_stateid_locked(s, &reaplist);
spin_unlock(&clp->cl_lock);
+ list_for_each_entry(stp, &reaplist, st_locks)
+ nfs4_free_cpntf_statelist(clp->net, &stp->st_stid);
free_ol_stateid_reaplist(&reaplist);
} else {
spin_unlock(&clp->cl_lock);
@@ -6775,6 +6901,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto put_stateid;
+ wake_up_var(d_inode(cstate->current_fh.fh_dentry));
destroy_delegation(dp);
put_stateid:
nfs4_put_stid(&dp->dl_stid);
@@ -7830,6 +7957,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->blocked_locks_lru);
INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
+ INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
get_net(net);
return 0;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1e9690a061ec..bcfeb1a922c0 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -42,6 +42,8 @@
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/addr.h>
#include <linux/xattr.h>
+#include <linux/vmalloc.h>
+
#include <uapi/linux/xattr.h>
#include "idmap.h"
@@ -791,6 +793,7 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit
return nfserr_bad_xdr;
if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0)
return nfserr_bad_xdr;
+ memset(&commit->co_verf, 0, sizeof(commit->co_verf));
return nfs_ok;
}
@@ -799,6 +802,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
{
__be32 *p, status;
+ memset(create, 0, sizeof(*create));
if (xdr_stream_decode_u32(argp->xdr, &create->cr_type) < 0)
return nfserr_bad_xdr;
switch (create->cr_type) {
@@ -848,6 +852,7 @@ nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegretu
static inline __be32
nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
{
+ memset(getattr, 0, sizeof(*getattr));
return nfsd4_decode_bitmap4(argp, getattr->ga_bmval,
ARRAY_SIZE(getattr->ga_bmval));
}
@@ -855,6 +860,7 @@ nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *geta
static __be32
nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
{
+ memset(link, 0, sizeof(*link));
return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen);
}
@@ -903,6 +909,7 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
static __be32
nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
{
+ memset(lock, 0, sizeof(*lock));
if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0)
return nfserr_bad_xdr;
if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT))
@@ -919,6 +926,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
static __be32
nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
{
+ memset(lockt, 0, sizeof(*lockt));
if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0)
return nfserr_bad_xdr;
if ((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
@@ -1140,11 +1148,8 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
__be32 status;
u32 dummy;
- memset(open->op_bmval, 0, sizeof(open->op_bmval));
- open->op_iattr.ia_valid = 0;
- open->op_openowner = NULL;
+ memset(open, 0, sizeof(*open));
- open->op_xdr_error = 0;
if (xdr_stream_decode_u32(argp->xdr, &open->op_seqid) < 0)
return nfserr_bad_xdr;
/* deleg_want is ignored */
@@ -1179,6 +1184,8 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con
if (xdr_stream_decode_u32(argp->xdr, &open_conf->oc_seqid) < 0)
return nfserr_bad_xdr;
+ memset(&open_conf->oc_resp_stateid, 0,
+ sizeof(open_conf->oc_resp_stateid));
return nfs_ok;
}
@@ -1187,6 +1194,7 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d
{
__be32 status;
+ memset(open_down, 0, sizeof(*open_down));
status = nfsd4_decode_stateid4(argp, &open_down->od_stateid);
if (status)
return status;
@@ -1216,6 +1224,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
if (!putfh->pf_fhval)
return nfserr_jukebox;
+ putfh->no_verify = false;
return nfs_ok;
}
@@ -1232,6 +1241,7 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
{
__be32 status;
+ memset(read, 0, sizeof(*read));
status = nfsd4_decode_stateid4(argp, &read->rd_stateid);
if (status)
return status;
@@ -1248,6 +1258,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read
{
__be32 status;
+ memset(readdir, 0, sizeof(*readdir));
if (xdr_stream_decode_u64(argp->xdr, &readdir->rd_cookie) < 0)
return nfserr_bad_xdr;
status = nfsd4_decode_verifier4(argp, &readdir->rd_verf);
@@ -1267,6 +1278,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read
static __be32
nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
{
+ memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo));
return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen);
}
@@ -1275,6 +1287,7 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
{
__be32 status;
+ memset(rename, 0, sizeof(*rename));
status = nfsd4_decode_component4(argp, &rename->rn_sname, &rename->rn_snamelen);
if (status)
return status;
@@ -1291,6 +1304,7 @@ static __be32
nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
struct nfsd4_secinfo *secinfo)
{
+ secinfo->si_exp = NULL;
return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen);
}
@@ -1299,6 +1313,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
{
__be32 status;
+ memset(setattr, 0, sizeof(*setattr));
status = nfsd4_decode_stateid4(argp, &setattr->sa_stateid);
if (status)
return status;
@@ -1313,6 +1328,8 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
{
__be32 *p, status;
+ memset(setclientid, 0, sizeof(*setclientid));
+
if (argp->minorversion >= 1)
return nfserr_notsupp;
@@ -1369,6 +1386,8 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify
{
__be32 *p, status;
+ memset(verify, 0, sizeof(*verify));
+
status = nfsd4_decode_bitmap4(argp, verify->ve_bmval,
ARRAY_SIZE(verify->ve_bmval));
if (status)
@@ -1408,6 +1427,9 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen))
return nfserr_bad_xdr;
+ write->wr_bytes_written = 0;
+ write->wr_how_written = 0;
+ memset(&write->wr_verifier, 0, sizeof(write->wr_verifier));
return nfs_ok;
}
@@ -1432,6 +1454,7 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc)
{
+ memset(bc, 0, sizeof(*bc));
if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0)
return nfserr_bad_xdr;
return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
@@ -1442,6 +1465,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
u32 use_conn_in_rdma_mode;
__be32 status;
+ memset(bcts, 0, sizeof(*bcts));
status = nfsd4_decode_sessionid4(argp, &bcts->sessionid);
if (status)
return status;
@@ -1583,6 +1607,7 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
{
__be32 status;
+ memset(exid, 0, sizeof(*exid));
status = nfsd4_decode_verifier4(argp, &exid->verifier);
if (status)
return status;
@@ -1635,6 +1660,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
{
__be32 status;
+ memset(sess, 0, sizeof(*sess));
status = nfsd4_decode_clientid4(argp, &sess->clientid);
if (status)
return status;
@@ -1650,11 +1676,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
return status;
if (xdr_stream_decode_u32(argp->xdr, &sess->callback_prog) < 0)
return nfserr_bad_xdr;
- status = nfsd4_decode_cb_sec(argp, &sess->cb_sec);
- if (status)
- return status;
-
- return nfs_ok;
+ return nfsd4_decode_cb_sec(argp, &sess->cb_sec);
}
static __be32
@@ -1678,6 +1700,7 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
{
__be32 status;
+ memset(gdev, 0, sizeof(*gdev));
status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid);
if (status)
return status;
@@ -1698,6 +1721,7 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
{
__be32 *p, status;
+ memset(lcp, 0, sizeof(*lcp));
if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.offset) < 0)
return nfserr_bad_xdr;
if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.length) < 0)
@@ -1733,6 +1757,7 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
{
__be32 status;
+ memset(lgp, 0, sizeof(*lgp));
if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_signal) < 0)
return nfserr_bad_xdr;
if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_layout_type) < 0)
@@ -1758,6 +1783,7 @@ static __be32
nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
struct nfsd4_layoutreturn *lrp)
{
+ memset(lrp, 0, sizeof(*lrp));
if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0)
return nfserr_bad_xdr;
if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_layout_type) < 0)
@@ -1773,6 +1799,8 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
{
if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0)
return nfserr_bad_xdr;
+
+ sin->sin_exp = NULL;
return nfs_ok;
}
@@ -1793,6 +1821,7 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
seq->maxslots = be32_to_cpup(p++);
seq->cachethis = be32_to_cpup(p);
+ seq->status_flags = 0;
return nfs_ok;
}
@@ -1803,6 +1832,7 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta
__be32 status;
u32 i;
+ memset(test_stateid, 0, sizeof(*test_stateid));
if (xdr_stream_decode_u32(argp->xdr, &test_stateid->ts_num_ids) < 0)
return nfserr_bad_xdr;
@@ -1900,6 +1930,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
struct nl4_server *ns_dummy;
__be32 status;
+ memset(copy, 0, sizeof(*copy));
status = nfsd4_decode_stateid4(argp, &copy->cp_src_stateid);
if (status)
return status;
@@ -1955,6 +1986,7 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
{
__be32 status;
+ memset(cn, 0, sizeof(*cn));
cn->cpn_src = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_src));
if (cn->cpn_src == NULL)
return nfserr_jukebox;
@@ -1972,6 +2004,8 @@ static __be32
nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
struct nfsd4_offload_status *os)
{
+ os->count = 0;
+ os->status = 0;
return nfsd4_decode_stateid4(argp, &os->stateid);
}
@@ -1988,6 +2022,8 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
if (xdr_stream_decode_u32(argp->xdr, &seek->seek_whence) < 0)
return nfserr_bad_xdr;
+ seek->seek_eof = 0;
+ seek->seek_pos = 0;
return nfs_ok;
}
@@ -2123,6 +2159,7 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,
__be32 status;
u32 maxcount;
+ memset(getxattr, 0, sizeof(*getxattr));
status = nfsd4_decode_xattr_name(argp, &getxattr->getxa_name);
if (status)
return status;
@@ -2131,8 +2168,7 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,
maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount);
getxattr->getxa_len = maxcount;
-
- return status;
+ return nfs_ok;
}
static __be32
@@ -2142,6 +2178,8 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
u32 flags, maxcount, size;
__be32 status;
+ memset(setxattr, 0, sizeof(*setxattr));
+
if (xdr_stream_decode_u32(argp->xdr, &flags) < 0)
return nfserr_bad_xdr;
@@ -2180,6 +2218,8 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
{
u32 maxcount;
+ memset(listxattrs, 0, sizeof(*listxattrs));
+
if (xdr_stream_decode_u64(argp->xdr, &listxattrs->lsxa_cookie) < 0)
return nfserr_bad_xdr;
@@ -2207,6 +2247,7 @@ static __be32
nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp,
struct nfsd4_removexattr *removexattr)
{
+ memset(removexattr, 0, sizeof(*removexattr));
return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name);
}
@@ -2357,22 +2398,15 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0)
return false;
- if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0)
+ if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0)
return false;
-
- /*
- * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS
- * here, so we return success at the xdr level so that
- * nfsd4_proc can handle this is an NFS-level error.
- */
- if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
- return true;
+ argp->opcnt = min_t(u32, argp->client_opcnt,
+ NFSD_MAX_OPS_PER_COMPOUND);
if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
- argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
+ argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops));
if (!argp->ops) {
argp->ops = argp->iops;
- dprintk("nfsd: couldn't allocate room for COMPOUND\n");
return false;
}
}
@@ -2774,9 +2808,10 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32
}
-static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
+static int nfsd4_get_mounted_on_ino(struct svc_export *exp, u64 *pino)
{
struct path path = exp->ex_path;
+ struct kstat stat;
int err;
path_get(&path);
@@ -2784,8 +2819,10 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
if (path.dentry != path.mnt->mnt_root)
break;
}
- err = vfs_getattr(&path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+ err = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
path_put(&path);
+ if (!err)
+ *pino = stat.ino;
return err;
}
@@ -3282,22 +3319,21 @@ out_acl:
*p++ = cpu_to_be32(stat.btime.tv_nsec);
}
if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
- struct kstat parent_stat;
u64 ino = stat.ino;
p = xdr_reserve_space(xdr, 8);
if (!p)
goto out_resource;
/*
- * Get parent's attributes if not ignoring crossmount
- * and this is the root of a cross-mounted filesystem.
+ * Get ino of mountpoint in parent filesystem, if not ignoring
+ * crossmount and this is the root of a cross-mounted
+ * filesystem.
*/
if (ignore_crossmnt == 0 &&
dentry == exp->ex_path.mnt->mnt_root) {
- err = get_parent_attributes(exp, &parent_stat);
+ err = nfsd4_get_mounted_on_ino(exp, &ino);
if (err)
goto out_nfserr;
- ino = parent_stat.ino;
}
p = xdr_encode_hyper(p, ino);
}
@@ -3994,7 +4030,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
}
if (resp->xdr->buf->page_len && splice_ok) {
WARN_ON_ONCE(1);
- return nfserr_resource;
+ return nfserr_serverfault;
}
xdr_commit_encode(xdr);
@@ -5394,7 +5430,7 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
struct nfsd4_compoundargs *args = rqstp->rq_argp;
if (args->ops != args->iops) {
- kfree(args->ops);
+ vfree(args->ops);
args->ops = args->iops;
}
while (args->to_free) {
@@ -5423,12 +5459,8 @@ bool
nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd4_compoundres *resp = rqstp->rq_resp;
- struct xdr_buf *buf = xdr->buf;
__be32 *p;
- WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
- buf->tail[0].iov_len);
-
/*
* Send buffer space for the following items is reserved
* at the top of nfsd4_proc_compound().
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 9b31e1103e7b..3e64a3d50a1c 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -604,9 +604,10 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
* scraping this file for info should test the labels to ensure they're
* getting the correct field.
*/
-static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
+int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
{
- struct nfsd_net *nn = m->private;
+ struct nfsd_net *nn = net_generic(file_inode(m->file)->i_sb->s_fs_info,
+ nfsd_net_id);
seq_printf(m, "max entries: %u\n", nn->max_drc_entries);
seq_printf(m, "num entries: %u\n",
@@ -626,11 +627,3 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
return 0;
}
-
-int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file)
-{
- struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info,
- nfsd_net_id);
-
- return single_open(file, nfsd_reply_cache_stats_show, nn);
-}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 917fa1892fd2..dc74a947a440 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -185,17 +185,7 @@ static int export_features_show(struct seq_file *m, void *v)
return 0;
}
-static int export_features_open(struct inode *inode, struct file *file)
-{
- return single_open(file, export_features_show, NULL);
-}
-
-static const struct file_operations export_features_operations = {
- .open = export_features_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(export_features);
#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
static int supported_enctypes_show(struct seq_file *m, void *v)
@@ -204,17 +194,7 @@ static int supported_enctypes_show(struct seq_file *m, void *v)
return 0;
}
-static int supported_enctypes_open(struct inode *inode, struct file *file)
-{
- return single_open(file, supported_enctypes_show, NULL);
-}
-
-static const struct file_operations supported_enctypes_ops = {
- .open = supported_enctypes_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(supported_enctypes);
#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
static const struct file_operations pool_stats_operations = {
@@ -224,19 +204,9 @@ static const struct file_operations pool_stats_operations = {
.release = nfsd_pool_stats_release,
};
-static const struct file_operations reply_cache_stats_operations = {
- .open = nfsd_reply_cache_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(nfsd_reply_cache_stats);
-static const struct file_operations filecache_ops = {
- .open = nfsd_file_cache_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(nfsd_file_cache_stats);
/*----------------------------------------------------------------------------*/
/*
@@ -1365,7 +1335,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
/* Per-export io stats use same ops as exports file */
[NFSD_Export_Stats] = {"export_stats", &exports_nfsd_operations, S_IRUGO},
[NFSD_Export_features] = {"export_features",
- &export_features_operations, S_IRUGO},
+ &export_features_fops, S_IRUGO},
[NFSD_FO_UnlockIP] = {"unlock_ip",
&transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_FO_UnlockFS] = {"unlock_filesystem",
@@ -1374,14 +1344,16 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO},
- [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO},
+ [NFSD_Reply_Cache_Stats] = {"reply_cache_stats",
+ &nfsd_reply_cache_stats_fops, S_IRUGO},
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
- [NFSD_Filecache] = {"filecache", &filecache_ops, S_IRUGO},
+ [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
- [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
+ [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes",
+ &supported_enctypes_fops, S_IRUGO},
#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
@@ -1481,16 +1453,19 @@ static __net_init int nfsd_init_net(struct net *net)
goto out_idmap_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
- retval = nfsd_reply_cache_init(nn);
+ retval = nfsd4_init_leases_net(nn);
if (retval)
goto out_drc_error;
- nfsd4_init_leases_net(nn);
-
+ retval = nfsd_reply_cache_init(nn);
+ if (retval)
+ goto out_cache_error;
get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
seqlock_init(&nn->writeverf_lock);
return 0;
+out_cache_error:
+ nfsd4_leases_net_shutdown(nn);
out_drc_error:
nfsd_idmap_shutdown(net);
out_idmap_error:
@@ -1507,6 +1482,7 @@ static __net_exit void nfsd_exit_net(struct net *net)
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
+ nfsd4_leases_net_shutdown(nn);
}
static struct pernet_operations nfsd_net_ops = {
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 57a468ed85c3..09726c5b9a31 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -164,6 +164,7 @@ char * nfs4_recoverydir(void);
bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
int nfsd4_create_laundry_wq(void);
void nfsd4_destroy_laundry_wq(void);
+bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode);
#else
static inline int nfsd4_init_slabs(void) { return 0; }
static inline void nfsd4_free_slabs(void) { }
@@ -179,6 +180,11 @@ static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
}
static inline int nfsd4_create_laundry_wq(void) { return 0; };
static inline void nfsd4_destroy_laundry_wq(void) {};
+static inline bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp,
+ struct inode *inode)
+{
+ return false;
+}
#endif
/*
@@ -343,6 +349,7 @@ void nfsd_lockd_shutdown(void);
#define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */
#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128
#define NFS4_CLIENTS_PER_GB 1024
+#define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */
/*
* The following attributes are currently not supported by the NFSv4 server:
@@ -498,7 +505,8 @@ extern void unregister_cld_notifier(void);
extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
#endif
-extern void nfsd4_init_leases_net(struct nfsd_net *nn);
+extern int nfsd4_init_leases_net(struct nfsd_net *nn);
+extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn);
#else /* CONFIG_NFSD_V4 */
static inline int nfsd4_is_junction(struct dentry *dentry)
@@ -506,7 +514,8 @@ static inline int nfsd4_is_junction(struct dentry *dentry)
return 0;
}
-static inline void nfsd4_init_leases_net(struct nfsd_net *nn) {};
+static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; };
+static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {};
#define register_cld_notifier() 0
#define unregister_cld_notifier() do { } while(0)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index a5b71526cee0..8c52b6c9d31a 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -392,14 +392,8 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
skip_pseudoflavor_check:
/* Finally, check access permissions. */
error = nfsd_permission(rqstp, exp, dentry, access);
-
- if (error) {
- dprintk("fh_verify: %pd2 permission failure, "
- "acc=%x, error=%d\n",
- dentry,
- access, ntohl(error));
- }
out:
+ trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
if (error == nfserr_stale)
nfsd_stats_fh_stale_inc(exp);
return error;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 7381972f1677..82b3ddeacc33 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -185,6 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
argp->count, argp->offset);
argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
+ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
v = 0;
len = argp->count;
@@ -390,9 +391,8 @@ nfsd_proc_create(struct svc_rqst *rqstp)
resp->status = nfs_ok;
if (!inode) {
/* File doesn't exist. Create it and set attrs */
- resp->status = nfsd_create_locked(rqstp, dirfhp, argp->name,
- argp->len, &attrs, type, rdev,
- newfhp);
+ resp->status = nfsd_create_locked(rqstp, dirfhp, &attrs, type,
+ rdev, newfhp);
} else if (type == S_IFREG) {
dprintk("nfsd: existing %s, valid=%x, size=%ld\n",
argp->name, attr->ia_valid, (long) attr->ia_size);
@@ -567,24 +567,15 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp,
struct xdr_buf *buf = &resp->dirlist;
struct xdr_stream *xdr = &resp->xdr;
- count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp));
-
memset(buf, 0, sizeof(*buf));
/* Reserve room for the NULL ptr & eof flag (-2 words) */
- buf->buflen = count - XDR_UNIT * 2;
+ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), (u32)PAGE_SIZE);
+ buf->buflen -= XDR_UNIT * 2;
buf->pages = rqstp->rq_next_page;
rqstp->rq_next_page++;
- /* This is xdr_init_encode(), but it assumes that
- * the head kvec has already been consumed. */
- xdr_set_scratch_buffer(xdr, NULL, 0);
- xdr->buf = buf;
- xdr->page_ptr = buf->pages;
- xdr->iov = NULL;
- xdr->p = page_address(*buf->pages);
- xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
- xdr->rqst = NULL;
+ xdr_init_encode_pages(xdr, buf, buf->pages, NULL);
}
/*
@@ -646,6 +637,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_decode = nfssvc_decode_voidarg,
.pc_encode = nfssvc_encode_voidres,
.pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
.pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = 0,
@@ -657,6 +649,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_encode = nfssvc_encode_attrstatres,
.pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_argzero = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd_attrstat),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+AT,
@@ -668,6 +661,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_encode = nfssvc_encode_attrstatres,
.pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd_sattrargs),
+ .pc_argzero = sizeof(struct nfsd_sattrargs),
.pc_ressize = sizeof(struct nfsd_attrstat),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+AT,
@@ -678,6 +672,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_decode = nfssvc_decode_voidarg,
.pc_encode = nfssvc_encode_voidres,
.pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
.pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = 0,
@@ -689,6 +684,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_encode = nfssvc_encode_diropres,
.pc_release = nfssvc_release_diropres,
.pc_argsize = sizeof(struct nfsd_diropargs),
+ .pc_argzero = sizeof(struct nfsd_diropargs),
.pc_ressize = sizeof(struct nfsd_diropres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+FH+AT,
@@ -699,6 +695,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_decode = nfssvc_decode_fhandleargs,
.pc_encode = nfssvc_encode_readlinkres,
.pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_argzero = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd_readlinkres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+1+NFS_MAXPATHLEN/4,
@@ -710,6 +707,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_encode = nfssvc_encode_readres,
.pc_release = nfssvc_release_readres,
.pc_argsize = sizeof(struct nfsd_readargs),
+ .pc_argzero = sizeof(struct nfsd_readargs),
.pc_ressize = sizeof(struct nfsd_readres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
@@ -720,6 +718,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_decode = nfssvc_decode_voidarg,
.pc_encode = nfssvc_encode_voidres,
.pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
.pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = 0,
@@ -731,6 +730,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_encode = nfssvc_encode_attrstatres,
.pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd_writeargs),
+ .pc_argzero = sizeof(struct nfsd_writeargs),
.pc_ressize = sizeof(struct nfsd_attrstat),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+AT,
@@ -742,6 +742,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_encode = nfssvc_encode_diropres,
.pc_release = nfssvc_release_diropres,
.pc_argsize = sizeof(struct nfsd_createargs),
+ .pc_argzero = sizeof(struct nfsd_createargs),
.pc_ressize = sizeof(struct nfsd_diropres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+FH+AT,
@@ -752,6 +753,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_decode = nfssvc_decode_diropargs,
.pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_diropargs),
+ .pc_argzero = sizeof(struct nfsd_diropargs),
.pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
.pc_xdrressize = ST,
@@ -762,6 +764,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_decode = nfssvc_decode_renameargs,
.pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_renameargs),
+ .pc_argzero = sizeof(struct nfsd_renameargs),
.pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
.pc_xdrressize = ST,
@@ -772,6 +775,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_decode = nfssvc_decode_linkargs,
.pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_linkargs),
+ .pc_argzero = sizeof(struct nfsd_linkargs),
.pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
.pc_xdrressize = ST,
@@ -782,6 +786,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_decode = nfssvc_decode_symlinkargs,
.pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_symlinkargs),
+ .pc_argzero = sizeof(struct nfsd_symlinkargs),
.pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
.pc_xdrressize = ST,
@@ -793,6 +798,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_encode = nfssvc_encode_diropres,
.pc_release = nfssvc_release_diropres,
.pc_argsize = sizeof(struct nfsd_createargs),
+ .pc_argzero = sizeof(struct nfsd_createargs),
.pc_ressize = sizeof(struct nfsd_diropres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+FH+AT,
@@ -803,6 +809,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_decode = nfssvc_decode_diropargs,
.pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_diropargs),
+ .pc_argzero = sizeof(struct nfsd_diropargs),
.pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
.pc_xdrressize = ST,
@@ -813,6 +820,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_decode = nfssvc_decode_readdirargs,
.pc_encode = nfssvc_encode_readdirres,
.pc_argsize = sizeof(struct nfsd_readdirargs),
+ .pc_argzero = sizeof(struct nfsd_readdirargs),
.pc_ressize = sizeof(struct nfsd_readdirres),
.pc_cachetype = RC_NOCACHE,
.pc_name = "READDIR",
@@ -822,6 +830,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_decode = nfssvc_decode_fhandleargs,
.pc_encode = nfssvc_encode_statfsres,
.pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_argzero = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd_statfsres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+5,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 4bb5baa17040..bfbd9f672f59 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -799,7 +799,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
if (nrservs == 0 && nn->nfsd_serv == NULL)
goto out;
- strlcpy(nn->nfsd_name, utsname()->nodename,
+ strscpy(nn->nfsd_name, utsname()->nodename,
sizeof(nn->nfsd_name));
error = nfsd_create_serv(net);
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index aba8520b4b8b..caf6355b18fa 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -338,10 +338,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (args->len > NFSSVC_MAXBLKSIZE_V2)
return false;
- if (!xdr_stream_subsegment(xdr, &args->payload, args->len))
- return false;
- return true;
+ return xdr_stream_subsegment(xdr, &args->payload, args->len);
}
bool
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index ae596dbf8667..e2daef3cc003 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -57,11 +57,11 @@ typedef struct {
} stateid_t;
typedef struct {
- stateid_t stid;
+ stateid_t cs_stid;
#define NFS4_COPY_STID 1
#define NFS4_COPYNOTIFY_STID 2
- unsigned char sc_type;
- refcount_t sc_count;
+ unsigned char cs_type;
+ refcount_t cs_count;
} copy_stateid_t;
struct nfsd4_callback {
@@ -175,7 +175,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
/* Maximum number of slots per session. 160 is useful for long haul TCP */
#define NFSD_MAX_SLOTS_PER_SESSION 160
/* Maximum number of operations per session compound */
-#define NFSD_MAX_OPS_PER_COMPOUND 16
+#define NFSD_MAX_OPS_PER_COMPOUND 50
/* Maximum session per slot cache size */
#define NFSD_SLOT_CACHE_SIZE 2048
/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
@@ -692,12 +692,11 @@ extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
-extern void nfsd4_run_cb(struct nfsd4_callback *cb);
+extern bool nfsd4_run_cb(struct nfsd4_callback *cb);
extern int nfsd4_create_callback_queue(void);
extern void nfsd4_destroy_callback_queue(void);
extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
-extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
struct xdr_netobj princhash, struct nfsd_net *nn);
extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index a8c5a02a84f0..777e24e5da33 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -32,7 +32,7 @@ struct svc_stat nfsd_svcstats = {
.program = &nfsd_program,
};
-static int nfsd_proc_show(struct seq_file *seq, void *v)
+static int nfsd_show(struct seq_file *seq, void *v)
{
int i;
@@ -72,17 +72,7 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
return 0;
}
-static int nfsd_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, nfsd_proc_show, NULL);
-}
-
-static const struct proc_ops nfsd_proc_ops = {
- .proc_open = nfsd_proc_open,
- .proc_read = seq_read,
- .proc_lseek = seq_lseek,
- .proc_release = single_release,
-};
+DEFINE_PROC_SHOW_ATTRIBUTE(nfsd);
int nfsd_percpu_counters_init(struct percpu_counter counters[], int num)
{
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 9ebd67d461f9..06a96e955bd0 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -84,19 +84,26 @@ DEFINE_NFSD_XDR_ERR_EVENT(cant_encode);
{ NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" })
TRACE_EVENT(nfsd_compound,
- TP_PROTO(const struct svc_rqst *rqst,
- u32 args_opcnt),
- TP_ARGS(rqst, args_opcnt),
+ TP_PROTO(
+ const struct svc_rqst *rqst,
+ const char *tag,
+ u32 taglen,
+ u32 opcnt
+ ),
+ TP_ARGS(rqst, tag, taglen, opcnt),
TP_STRUCT__entry(
__field(u32, xid)
- __field(u32, args_opcnt)
+ __field(u32, opcnt)
+ __string_len(tag, tag, taglen)
),
TP_fast_assign(
__entry->xid = be32_to_cpu(rqst->rq_xid);
- __entry->args_opcnt = args_opcnt;
+ __entry->opcnt = opcnt;
+ __assign_str_len(tag, tag, taglen);
),
- TP_printk("xid=0x%08x opcnt=%u",
- __entry->xid, __entry->args_opcnt)
+ TP_printk("xid=0x%08x opcnt=%u tag=%s",
+ __entry->xid, __entry->opcnt, __get_str(tag)
+ )
)
TRACE_EVENT(nfsd_compound_status,
@@ -195,7 +202,7 @@ TRACE_EVENT(nfsd_fh_verify,
__sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
__field(u32, xid)
__field(u32, fh_hash)
- __field(void *, inode)
+ __field(const void *, inode)
__field(unsigned long, type)
__field(unsigned long, access)
),
@@ -211,13 +218,55 @@ TRACE_EVENT(nfsd_fh_verify,
__entry->type = type;
__entry->access = access;
),
- TP_printk("xid=0x%08x fh_hash=0x%08x inode=%p type=%s access=%s",
- __entry->xid, __entry->fh_hash, __entry->inode,
+ TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s",
+ __entry->xid, __entry->fh_hash,
show_fs_file_type(__entry->type),
show_nfsd_may_flags(__entry->access)
)
);
+TRACE_EVENT_CONDITION(nfsd_fh_verify_err,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ umode_t type,
+ int access,
+ __be32 error
+ ),
+ TP_ARGS(rqstp, fhp, type, access, error),
+ TP_CONDITION(error),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __sockaddr(server, rqstp->rq_xprt->xpt_remotelen)
+ __sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
+ __field(u32, xid)
+ __field(u32, fh_hash)
+ __field(const void *, inode)
+ __field(unsigned long, type)
+ __field(unsigned long, access)
+ __field(int, error)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = SVC_NET(rqstp)->ns.inum;
+ __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local,
+ rqstp->rq_xprt->xpt_locallen);
+ __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote,
+ rqstp->rq_xprt->xpt_remotelen);
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->inode = d_inode(fhp->fh_dentry);
+ __entry->type = type;
+ __entry->access = access;
+ __entry->error = be32_to_cpu(error);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s error=%d",
+ __entry->xid, __entry->fh_hash,
+ show_fs_file_type(__entry->type),
+ show_nfsd_may_flags(__entry->access),
+ __entry->error
+ )
+);
+
DECLARE_EVENT_CLASS(nfsd_fh_err_class,
TP_PROTO(struct svc_rqst *rqstp,
struct svc_fh *fhp,
@@ -489,6 +538,29 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
#include "filecache.h"
#include "vfs.h"
+TRACE_EVENT(nfsd_delegret_wakeup,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct inode *inode,
+ long timeo
+ ),
+ TP_ARGS(rqstp, inode, timeo),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(const void *, inode)
+ __field(long, timeo)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->inode = inode;
+ __entry->timeo = timeo;
+ ),
+ TP_printk("xid=0x%08x inode=%p%s",
+ __entry->xid, __entry->inode,
+ __entry->timeo == 0 ? " (timed out)" : ""
+ )
+);
+
DECLARE_EVENT_CLASS(nfsd_stateid_class,
TP_PROTO(stateid_t *stp),
TP_ARGS(stp),
@@ -1399,6 +1471,45 @@ TRACE_EVENT(nfsd_cb_offload,
__entry->fh_hash, __entry->count, __entry->status)
);
+DECLARE_EVENT_CLASS(nfsd_cb_done_class,
+ TP_PROTO(
+ const stateid_t *stp,
+ const struct rpc_task *task
+ ),
+ TP_ARGS(stp, task),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, si_id)
+ __field(u32, si_generation)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
+ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
+ __entry->si_id = stp->si_opaque.so_id;
+ __entry->si_generation = stp->si_generation;
+ __entry->status = task->tk_status;
+ ),
+ TP_printk("client %08x:%08x stateid %08x:%08x status=%d",
+ __entry->cl_boot, __entry->cl_id, __entry->si_id,
+ __entry->si_generation, __entry->status
+ )
+);
+
+#define DEFINE_NFSD_CB_DONE_EVENT(name) \
+DEFINE_EVENT(nfsd_cb_done_class, name, \
+ TP_PROTO( \
+ const stateid_t *stp, \
+ const struct rpc_task *task \
+ ), \
+ TP_ARGS(stp, task))
+
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
+
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index fc17b0ac8729..f650afedd67f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -343,8 +343,61 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
return nfserrno(get_write_access(inode));
}
-/*
- * Set various file attributes. After this call fhp needs an fh_put.
+static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
+{
+ int host_err;
+
+ if (iap->ia_valid & ATTR_SIZE) {
+ /*
+ * RFC5661, Section 18.30.4:
+ * Changing the size of a file with SETATTR indirectly
+ * changes the time_modify and change attributes.
+ *
+ * (and similar for the older RFCs)
+ */
+ struct iattr size_attr = {
+ .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+ .ia_size = iap->ia_size,
+ };
+
+ if (iap->ia_size < 0)
+ return -EFBIG;
+
+ host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
+ if (host_err)
+ return host_err;
+ iap->ia_valid &= ~ATTR_SIZE;
+
+ /*
+ * Avoid the additional setattr call below if the only other
+ * attribute that the client sends is the mtime, as we update
+ * it as part of the size change above.
+ */
+ if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+ return 0;
+ }
+
+ if (!iap->ia_valid)
+ return 0;
+
+ iap->ia_valid |= ATTR_CTIME;
+ return notify_change(&init_user_ns, dentry, iap, NULL);
+}
+
+/**
+ * nfsd_setattr - Set various file attributes.
+ * @rqstp: controlling RPC transaction
+ * @fhp: filehandle of target
+ * @attr: attributes to set
+ * @check_guard: set to 1 if guardtime is a valid timestamp
+ * @guardtime: do not act if ctime.tv_sec does not match this timestamp
+ *
+ * This call may adjust the contents of @attr (in particular, this
+ * call may change the bits in the na_iattr.ia_valid field).
+ *
+ * Returns nfs_ok on success, otherwise an NFS status code is
+ * returned. Caller must release @fhp by calling fh_put in either
+ * case.
*/
__be32
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -357,9 +410,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int accmode = NFSD_MAY_SATTR;
umode_t ftype = 0;
__be32 err;
- int host_err = 0;
+ int host_err;
bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE);
+ int retries;
if (iap->ia_valid & ATTR_SIZE) {
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -414,43 +468,13 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
inode_lock(inode);
- if (size_change) {
- /*
- * RFC5661, Section 18.30.4:
- * Changing the size of a file with SETATTR indirectly
- * changes the time_modify and change attributes.
- *
- * (and similar for the older RFCs)
- */
- struct iattr size_attr = {
- .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
- .ia_size = iap->ia_size,
- };
-
- host_err = -EFBIG;
- if (iap->ia_size < 0)
- goto out_unlock;
-
- host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
- if (host_err)
- goto out_unlock;
- iap->ia_valid &= ~ATTR_SIZE;
-
- /*
- * Avoid the additional setattr call below if the only other
- * attribute that the client sends is the mtime, as we update
- * it as part of the size change above.
- */
- if ((iap->ia_valid & ~ATTR_MTIME) == 0)
- goto out_unlock;
- }
-
- if (iap->ia_valid) {
- iap->ia_valid |= ATTR_CTIME;
- host_err = notify_change(&init_user_ns, dentry, iap, NULL);
+ for (retries = 1;;) {
+ host_err = __nfsd_setattr(dentry, iap);
+ if (host_err != -EAGAIN || !retries--)
+ break;
+ if (!nfsd_wait_for_delegreturn(rqstp, inode))
+ break;
}
-
-out_unlock:
if (attr->na_seclabel && attr->na_seclabel->len)
attr->na_labelerr = security_inode_setsecctx(dentry,
attr->na_seclabel->data, attr->na_seclabel->len);
@@ -1255,7 +1279,7 @@ nfsd_check_ignore_resizing(struct iattr *iap)
/* The parent directory should already be locked: */
__be32
nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
- char *fname, int flen, struct nfsd_attrs *attrs,
+ struct nfsd_attrs *attrs,
int type, dev_t rdev, struct svc_fh *resfhp)
{
struct dentry *dentry, *dchild;
@@ -1382,8 +1406,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (err)
goto out_unlock;
fh_fill_pre_attrs(fhp);
- err = nfsd_create_locked(rqstp, fhp, fname, flen, attrs, type,
- rdev, resfhp);
+ err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp);
fh_fill_post_attrs(fhp);
out_unlock:
inode_unlock(dentry->d_inode);
@@ -1673,7 +1696,15 @@ retry:
.new_dir = tdir,
.new_dentry = ndentry,
};
- host_err = vfs_rename(&rd);
+ int retries;
+
+ for (retries = 1;;) {
+ host_err = vfs_rename(&rd);
+ if (host_err != -EAGAIN || !retries--)
+ break;
+ if (!nfsd_wait_for_delegreturn(rqstp, d_inode(odentry)))
+ break;
+ }
if (!host_err) {
host_err = commit_metadata(tfhp);
if (!host_err)
@@ -1757,9 +1788,18 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
fh_fill_pre_attrs(fhp);
if (type != S_IFDIR) {
+ int retries;
+
if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK)
nfsd_close_cached_files(rdentry);
- host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL);
+
+ for (retries = 1;;) {
+ host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL);
+ if (host_err != -EAGAIN || !retries--)
+ break;
+ if (!nfsd_wait_for_delegreturn(rqstp, rinode))
+ break;
+ }
} else {
host_err = vfs_rmdir(&init_user_ns, dirp, rdentry);
}
@@ -1814,7 +1854,7 @@ struct readdir_data {
int full;
};
-static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
+static bool nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
int namlen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -1826,7 +1866,7 @@ static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64));
if (buf->used + reclen > PAGE_SIZE) {
buf->full = 1;
- return -EINVAL;
+ return false;
}
de->namlen = namlen;
@@ -1836,7 +1876,7 @@ static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
memcpy(de->name, name, namlen);
buf->used += reclen;
- return 0;
+ return true;
}
static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp,
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index c95cd414b4bb..120521bc7b24 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -79,8 +79,8 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
u64 count, bool sync);
#endif /* CONFIG_NFSD_V4 */
__be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
- char *name, int len, struct nfsd_attrs *attrs,
- int type, dev_t rdev, struct svc_fh *res);
+ struct nfsd_attrs *attrs, int type, dev_t rdev,
+ struct svc_fh *res);
__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct nfsd_attrs *attrs,
int type, dev_t rdev, struct svc_fh *res);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 96267258e629..0eb00105d845 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -717,13 +717,13 @@ struct nfsd4_compoundargs {
struct svcxdr_tmpbuf *to_free;
struct svc_rqst *rqstp;
- u32 taglen;
char * tag;
+ u32 taglen;
u32 minorversion;
+ u32 client_opcnt;
u32 opcnt;
struct nfsd4_op *ops;
struct nfsd4_op iops[8];
- int cachetype;
};
struct nfsd4_compoundres {
@@ -732,8 +732,8 @@ struct nfsd4_compoundres {
struct svc_rqst * rqstp;
__be32 *statusp;
- u32 taglen;
char * tag;
+ u32 taglen;
u32 opcnt;
struct nfsd4_compound_state cstate;
@@ -888,7 +888,8 @@ struct nfsd4_operation {
u32 op_flags;
char *op_name;
/* Try to get response size before operation */
- u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *);
+ u32 (*op_rsize_bop)(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op);
void (*op_get_currentstateid)(struct nfsd4_compound_state *,
union nfsd4_op_u *);
void (*op_set_currentstateid)(struct nfsd4_compound_state *,
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 9f4d9432d38a..b9d15c3df3cc 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1668,8 +1668,7 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
nextmaxkey = (nchildren > 1) ?
nilfs_btree_node_get_key(node, nchildren - 2) : 0;
- if (bh != NULL)
- brelse(bh);
+ brelse(bh);
return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
}
@@ -1717,8 +1716,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
ptrs[i] = le64_to_cpu(dptrs[i]);
}
- if (bh != NULL)
- brelse(bh);
+ brelse(bh);
return nitems;
}
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 67f63cfeade5..232dd7b6cca1 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -328,6 +328,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
struct inode *inode;
struct nilfs_inode_info *ii;
struct nilfs_root *root;
+ struct buffer_head *bh;
int err = -ENOMEM;
ino_t ino;
@@ -343,11 +344,25 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
ii->i_state = BIT(NILFS_I_NEW);
ii->i_root = root;
- err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh);
+ err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
if (unlikely(err))
goto failed_ifile_create_inode;
/* reference count of i_bh inherits from nilfs_mdt_read_block() */
+ if (unlikely(ino < NILFS_USER_INO)) {
+ nilfs_warn(sb,
+ "inode bitmap is inconsistent for reserved inodes");
+ do {
+ brelse(bh);
+ err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
+ if (unlikely(err))
+ goto failed_ifile_create_inode;
+ } while (ino < NILFS_USER_INO);
+
+ nilfs_info(sb, "repaired inode bitmap for reserved inodes");
+ }
+ ii->i_bh = bh;
+
atomic64_inc(&root->inodes_count);
inode_init_owner(&init_user_ns, inode, dir, mode);
inode->i_ino = ino;
@@ -440,6 +455,8 @@ int nilfs_read_inode_common(struct inode *inode,
inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
+ if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
+ return -EIO; /* this inode is for metadata and corrupted */
if (inode->i_nlink == 0)
return -ESTALE; /* this inode is deleted */
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 3267e96c256c..39b7eea2642a 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -480,41 +480,36 @@ unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
sector_t start_blk,
sector_t *blkoff)
{
- unsigned int i;
+ unsigned int i, nr_folios;
pgoff_t index;
- unsigned int nblocks_in_page;
unsigned long length = 0;
- sector_t b;
- struct pagevec pvec;
- struct page *page;
+ struct folio_batch fbatch;
+ struct folio *folio;
if (inode->i_mapping->nrpages == 0)
return 0;
index = start_blk >> (PAGE_SHIFT - inode->i_blkbits);
- nblocks_in_page = 1U << (PAGE_SHIFT - inode->i_blkbits);
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
repeat:
- pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
- pvec.pages);
- if (pvec.nr == 0)
+ nr_folios = filemap_get_folios_contig(inode->i_mapping, &index, ULONG_MAX,
+ &fbatch);
+ if (nr_folios == 0)
return length;
- if (length > 0 && pvec.pages[0]->index > index)
- goto out;
-
- b = pvec.pages[0]->index << (PAGE_SHIFT - inode->i_blkbits);
i = 0;
do {
- page = pvec.pages[i];
+ folio = fbatch.folios[i];
- lock_page(page);
- if (page_has_buffers(page)) {
+ folio_lock(folio);
+ if (folio_buffers(folio)) {
struct buffer_head *bh, *head;
+ sector_t b;
- bh = head = page_buffers(page);
+ b = folio->index << (PAGE_SHIFT - inode->i_blkbits);
+ bh = head = folio_buffers(folio);
do {
if (b < start_blk)
continue;
@@ -529,21 +524,17 @@ repeat:
} else {
if (length > 0)
goto out_locked;
-
- b += nblocks_in_page;
}
- unlock_page(page);
+ folio_unlock(folio);
- } while (++i < pagevec_count(&pvec));
+ } while (++i < nr_folios);
- index = page->index + 1;
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
goto repeat;
out_locked:
- unlock_page(page);
-out:
- pagevec_release(&pvec);
+ folio_unlock(folio);
+ folio_batch_release(&fbatch);
return length;
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 0afe0832c754..b4cebad21b48 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -875,9 +875,11 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
nilfs_cpfile_put_checkpoint(
nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
- } else
- WARN_ON(err == -EINVAL || err == -ENOENT);
-
+ } else if (err == -EINVAL || err == -ENOENT) {
+ nilfs_error(sci->sc_super,
+ "checkpoint creation failed due to metadata corruption.");
+ err = -EIO;
+ }
return err;
}
@@ -891,7 +893,11 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
&raw_cp, &bh_cp);
if (unlikely(err)) {
- WARN_ON(err == -EINVAL || err == -ENOENT);
+ if (err == -EINVAL || err == -ENOENT) {
+ nilfs_error(sci->sc_super,
+ "checkpoint finalization failed due to metadata corruption.");
+ err = -EIO;
+ }
goto failed_ibh;
}
raw_cp->cp_snapshot_list.ssl_next = 0;
@@ -2235,7 +2241,6 @@ int nilfs_construct_segment(struct super_block *sb)
struct the_nilfs *nilfs = sb->s_fs_info;
struct nilfs_sc_info *sci = nilfs->ns_writer;
struct nilfs_transaction_info *ti;
- int err;
if (!sci)
return -EROFS;
@@ -2243,8 +2248,7 @@ int nilfs_construct_segment(struct super_block *sb)
/* A call inside transactions causes a deadlock. */
BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC);
- err = nilfs_segctor_sync(sci);
- return err;
+ return nilfs_segctor_sync(sci);
}
/**
@@ -2786,10 +2790,9 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
err = nilfs_segctor_start_thread(nilfs->ns_writer);
- if (err) {
- kfree(nilfs->ns_writer);
- nilfs->ns_writer = NULL;
- }
+ if (unlikely(err))
+ nilfs_detach_log_writer(sb);
+
return err;
}
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index cd7d09a569ff..a2a15bc4df28 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -18,7 +18,7 @@
#include "fanotify.h"
-static bool fanotify_path_equal(struct path *p1, struct path *p2)
+static bool fanotify_path_equal(const struct path *p1, const struct path *p2)
{
return p1->mnt == p2->mnt && p1->dentry == p2->dentry;
}
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 1d9f11255c64..57f51a9a3015 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -452,13 +452,7 @@ static inline bool fanotify_is_error_event(u32 mask)
return mask & FAN_FS_ERROR;
}
-static inline bool fanotify_event_has_path(struct fanotify_event *event)
-{
- return event->type == FANOTIFY_EVENT_TYPE_PATH ||
- event->type == FANOTIFY_EVENT_TYPE_PATH_PERM;
-}
-
-static inline struct path *fanotify_event_path(struct fanotify_event *event)
+static inline const struct path *fanotify_event_path(struct fanotify_event *event)
{
if (event->type == FANOTIFY_EVENT_TYPE_PATH)
return &FANOTIFY_PE(event)->path;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index f0e49a406ffa..4546da4a54f9 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -249,7 +249,7 @@ out:
return event;
}
-static int create_fd(struct fsnotify_group *group, struct path *path,
+static int create_fd(struct fsnotify_group *group, const struct path *path,
struct file **file)
{
int client_fd;
@@ -619,7 +619,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
char __user *buf, size_t count)
{
struct fanotify_event_metadata metadata;
- struct path *path = fanotify_event_path(event);
+ const struct path *path = fanotify_event_path(event);
struct fanotify_info *info = fanotify_event_info(event);
unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
@@ -1553,7 +1553,7 @@ static int fanotify_test_fid(struct dentry *dentry)
}
static int fanotify_events_supported(struct fsnotify_group *group,
- struct path *path, __u64 mask,
+ const struct path *path, __u64 mask,
unsigned int flags)
{
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 87d8a50ee803..fde74eb333cc 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -76,10 +76,6 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
*/
extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
-/* allocate and destroy and event holder to attach events to notification/access queues */
-extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void);
-extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder);
-
extern struct kmem_cache *fsnotify_mark_connector_cachep;
#endif /* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 800c1d0eb0d0..3506f6074288 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -28,7 +28,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
struct inode *inode = d_inode(dentry);
const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
- return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
+ return dynamic_dname(buffer, buflen, "%s:[%lu]",
ns_ops->name, inode->i_ino);
}
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 52615e6090e1..a3865bc4a0c6 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -594,17 +594,37 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name,
for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
u8 *mrec_end = (u8 *)ctx->mrec +
le32_to_cpu(ctx->mrec->bytes_allocated);
- u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) +
- a->name_length * sizeof(ntfschar);
- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end ||
- name_end > mrec_end)
+ u8 *name_end;
+
+ /* check whether ATTR_RECORD wrap */
+ if ((u8 *)a < (u8 *)ctx->mrec)
+ break;
+
+ /* check whether Attribute Record Header is within bounds */
+ if ((u8 *)a > mrec_end ||
+ (u8 *)a + sizeof(ATTR_RECORD) > mrec_end)
+ break;
+
+ /* check whether ATTR_RECORD's name is within bounds */
+ name_end = (u8 *)a + le16_to_cpu(a->name_offset) +
+ a->name_length * sizeof(ntfschar);
+ if (name_end > mrec_end)
break;
+
ctx->attr = a;
if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||
a->type == AT_END))
return -ENOENT;
if (unlikely(!a->length))
break;
+
+ /* check whether ATTR_RECORD's length wrap */
+ if ((u8 *)a + le32_to_cpu(a->length) < (u8 *)a)
+ break;
+ /* check whether ATTR_RECORD's length is within bounds */
+ if ((u8 *)a + le32_to_cpu(a->length) > mrec_end)
+ break;
+
if (a->type != type)
continue;
/*
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 58b660dbbee9..c481b14e4fd9 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -527,12 +527,12 @@ err_out:
goto out;
}
-static inline int ntfs_submit_bh_for_read(struct buffer_head *bh)
+static inline void ntfs_submit_bh_for_read(struct buffer_head *bh)
{
lock_buffer(bh);
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- return submit_bh(REQ_OP_READ, bh);
+ submit_bh(REQ_OP_READ, bh);
}
/**
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index db0f1995aedd..08c659332e26 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1829,6 +1829,13 @@ int ntfs_read_inode_mount(struct inode *vi)
goto err_out;
}
+ /* Sanity check offset to the first attribute */
+ if (le16_to_cpu(m->attrs_offset) >= le32_to_cpu(m->bytes_allocated)) {
+ ntfs_error(sb, "Incorrect mft offset to the first attribute %u in superblock.",
+ le16_to_cpu(m->attrs_offset));
+ goto err_out;
+ }
+
/* Need this to sanity check attribute list references to $MFT. */
vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 5ae8de09b271..001f4e053c85 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2092,7 +2092,8 @@ get_ctx_vol_failed:
// TODO: Initialize security.
/* Get the extended system files' directory inode. */
vol->extend_ino = ntfs_iget(sb, FILE_Extend);
- if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) {
+ if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino) ||
+ !S_ISDIR(vol->extend_ino->i_mode)) {
if (!IS_ERR(vol->extend_ino))
iput(vol->extend_ino);
ntfs_error(sb, "Failed to load $Extend.");
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
index 5d44ceac855b..e92bbd754365 100644
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -560,7 +560,7 @@ static int wnd_rescan(struct wnd_bitmap *wnd)
buf = (ulong *)bh->b_data;
- used = __bitmap_weight(buf, wbits);
+ used = bitmap_weight(buf, wbits);
if (used < wbits) {
frb = wbits - used;
wnd->free_bits[iw] = frb;
@@ -1364,7 +1364,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
buf = (ulong *)bh->b_data;
__bitmap_clear(buf, b0, blocksize * 8 - b0);
- frb = wbits - __bitmap_weight(buf, wbits);
+ frb = wbits - bitmap_weight(buf, wbits);
wnd->total_zeroes += frb - wnd->free_bits[iw];
wnd->free_bits[iw] = frb;
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
index e7c494005122..0d611a6c5511 100644
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -3819,7 +3819,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
}
log_init_pg_hdr(log, page_size, page_size, 1, 1);
- log_create(log, l_size, 0, get_random_int(), false, false);
+ log_create(log, l_size, 0, get_random_u32(), false, false);
log->ra = ra;
@@ -3893,7 +3893,7 @@ check_restart_area:
/* Do some checks based on whether we have a valid log page. */
if (!rst_info.valid_page) {
- open_log_count = get_random_int();
+ open_log_count = get_random_u32();
goto init_log_instance;
}
open_log_count = le32_to_cpu(ra2->open_log_count);
@@ -4044,7 +4044,7 @@ find_oldest:
memcpy(ra->clients, Add2Ptr(ra2, t16),
le16_to_cpu(ra2->ra_len) - t16);
- log->current_openlog_count = get_random_int();
+ log->current_openlog_count = get_random_u32();
ra->open_log_count = cpu_to_le32(log->current_openlog_count);
log->ra_size = offsetof(struct RESTART_AREA, clients) +
sizeof(struct CLIENT_REC);
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 51363d4e8636..d5a3afbbbfd8 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -630,12 +630,9 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo,
bh->b_size = block_size;
off = vbo & (PAGE_SIZE - 1);
set_bh_page(bh, page, off);
- ll_rw_block(REQ_OP_READ, 1, &bh);
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh)) {
- err = -EIO;
+ err = bh_read(bh, 0);
+ if (err < 0)
goto out;
- }
zero_user_segment(page, off + voff, off + block_size);
}
}
@@ -1927,8 +1924,6 @@ const struct inode_operations ntfs_link_inode_operations = {
.setattr = ntfs3_setattr,
.listxattr = ntfs_listxattr,
.permission = ntfs_permission,
- .get_acl = ntfs_get_acl,
- .set_acl = ntfs_set_acl,
};
const struct address_space_operations ntfs_aops = {
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 6ae1f56b7358..7de8718c68a9 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -625,67 +625,6 @@ int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
return ntfs_set_acl_ex(mnt_userns, inode, acl, type, false);
}
-static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns,
- struct inode *inode, int type, void *buffer,
- size_t size)
-{
- struct posix_acl *acl;
- int err;
-
- if (!(inode->i_sb->s_flags & SB_POSIXACL)) {
- ntfs_inode_warn(inode, "add mount option \"acl\" to use acl");
- return -EOPNOTSUPP;
- }
-
- acl = ntfs_get_acl(inode, type, false);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
-
- if (!acl)
- return -ENODATA;
-
- err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- posix_acl_release(acl);
-
- return err;
-}
-
-static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns,
- struct inode *inode, int type, const void *value,
- size_t size)
-{
- struct posix_acl *acl;
- int err;
-
- if (!(inode->i_sb->s_flags & SB_POSIXACL)) {
- ntfs_inode_warn(inode, "add mount option \"acl\" to use acl");
- return -EOPNOTSUPP;
- }
-
- if (!inode_owner_or_capable(mnt_userns, inode))
- return -EPERM;
-
- if (!value) {
- acl = NULL;
- } else {
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
-
- if (acl) {
- err = posix_acl_valid(&init_user_ns, acl);
- if (err)
- goto release_and_out;
- }
- }
-
- err = ntfs_set_acl(mnt_userns, inode, acl, type);
-
-release_and_out:
- posix_acl_release(acl);
- return err;
-}
-
/*
* ntfs_init_acl - Initialize the ACLs of a new inode.
*
@@ -852,23 +791,6 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de,
goto out;
}
-#ifdef CONFIG_NTFS3_FS_POSIX_ACL
- if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 &&
- !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
- sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) ||
- (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 &&
- !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
- sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) {
- /* TODO: init_user_ns? */
- err = ntfs_xattr_get_acl(
- &init_user_ns, inode,
- name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1
- ? ACL_TYPE_ACCESS
- : ACL_TYPE_DEFAULT,
- buffer, size);
- goto out;
- }
-#endif
/* Deal with NTFS extended attribute. */
err = ntfs_get_ea(inode, name, name_len, buffer, size, NULL);
@@ -981,22 +903,6 @@ set_new_fa:
goto out;
}
-#ifdef CONFIG_NTFS3_FS_POSIX_ACL
- if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 &&
- !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
- sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) ||
- (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 &&
- !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
- sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) {
- err = ntfs_xattr_set_acl(
- mnt_userns, inode,
- name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1
- ? ACL_TYPE_ACCESS
- : ACL_TYPE_DEFAULT,
- value, size);
- goto out;
- }
-#endif
/* Deal with NTFS extended attribute. */
err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0);
@@ -1086,7 +992,7 @@ static bool ntfs_xattr_user_list(struct dentry *dentry)
}
// clang-format off
-static const struct xattr_handler ntfs_xattr_handler = {
+static const struct xattr_handler ntfs_other_xattr_handler = {
.prefix = "",
.get = ntfs_getxattr,
.set = ntfs_setxattr,
@@ -1094,7 +1000,11 @@ static const struct xattr_handler ntfs_xattr_handler = {
};
const struct xattr_handler *ntfs_xattr_handlers[] = {
- &ntfs_xattr_handler,
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+#endif
+ &ntfs_other_xattr_handler,
NULL,
};
// clang-format on
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index af4157f61927..1d65f6ef00ca 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -636,7 +636,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
!buffer_new(bh) &&
ocfs2_should_read_blk(inode, page, block_start) &&
(block_start < from || block_end > to)) {
- ll_rw_block(REQ_OP_READ, 1, &bh);
+ bh_read_nowait(bh, 0);
*wait_bh++=bh;
}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 81c3d65d68fe..694471fc46b8 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2032,7 +2032,7 @@ struct ocfs2_empty_dir_priv {
unsigned seen_other;
unsigned dx_dir;
};
-static int ocfs2_empty_dir_filldir(struct dir_context *ctx, const char *name,
+static bool ocfs2_empty_dir_filldir(struct dir_context *ctx, const char *name,
int name_len, loff_t pos, u64 ino,
unsigned type)
{
@@ -2052,7 +2052,7 @@ static int ocfs2_empty_dir_filldir(struct dir_context *ctx, const char *name,
*/
if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) {
p->seen_dot = 1;
- return 0;
+ return true;
}
if (name_len == 2 && !strncmp("..", name, 2) &&
@@ -2060,13 +2060,13 @@ static int ocfs2_empty_dir_filldir(struct dir_context *ctx, const char *name,
p->seen_dot_dot = 1;
if (p->dx_dir && p->seen_dot)
- return 1;
+ return false;
- return 0;
+ return true;
}
p->seen_other = 1;
- return 1;
+ return false;
}
static int ocfs2_empty_dir_dx(struct inode *inode,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index fa87d89cf754..126671e6caed 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -2057,7 +2057,7 @@ struct ocfs2_orphan_filldir_priv {
enum ocfs2_orphan_reco_type orphan_reco_type;
};
-static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
+static bool ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
int name_len, loff_t pos, u64 ino,
unsigned type)
{
@@ -2066,21 +2066,21 @@ static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
struct inode *iter;
if (name_len == 1 && !strncmp(".", name, 1))
- return 0;
+ return true;
if (name_len == 2 && !strncmp("..", name, 2))
- return 0;
+ return true;
/* do not include dio entry in case of orphan scan */
if ((p->orphan_reco_type == ORPHAN_NO_NEED_TRUNCATE) &&
(!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
OCFS2_DIO_ORPHAN_PREFIX_LEN)))
- return 0;
+ return true;
/* Skip bad inodes so that recovery can continue */
iter = ocfs2_iget(p->osb, ino,
OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
if (IS_ERR(iter))
- return 0;
+ return true;
if (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
OCFS2_DIO_ORPHAN_PREFIX_LEN))
@@ -2090,7 +2090,7 @@ static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
* happen concurrently with unlink/rename */
if (OCFS2_I(iter)->ip_next_orphan) {
iput(iter);
- return 0;
+ return true;
}
trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
@@ -2099,7 +2099,7 @@ static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
OCFS2_I(iter)->ip_next_orphan = p->head;
p->head = iter;
- return 0;
+ return true;
}
static int ocfs2_queue_orphans(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 961d1cf54388..05f32989bad6 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -232,6 +232,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns,
handle_t *handle = NULL;
struct ocfs2_super *osb;
struct ocfs2_dinode *dirfe;
+ struct ocfs2_dinode *fe = NULL;
struct buffer_head *new_fe_bh = NULL;
struct inode *inode = NULL;
struct ocfs2_alloc_context *inode_ac = NULL;
@@ -382,6 +383,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns,
goto leave;
}
+ fe = (struct ocfs2_dinode *) new_fe_bh->b_data;
if (S_ISDIR(mode)) {
status = ocfs2_fill_new_dir(osb, handle, dir, inode,
new_fe_bh, data_ac, meta_ac);
@@ -454,8 +456,11 @@ roll_back:
leave:
if (status < 0 && did_quota_inode)
dquot_free_inode(inode);
- if (handle)
+ if (handle) {
+ if (status < 0 && fe)
+ ocfs2_set_links_count(fe, 0);
ocfs2_commit_trans(osb, handle);
+ }
ocfs2_inode_unlock(dir, 1);
if (did_block_signals)
@@ -632,18 +637,9 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
return status;
}
- status = __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
+ return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
parent_fe_bh, handle, inode_ac,
fe_blkno, suballoc_loc, suballoc_bit);
- if (status < 0) {
- u64 bg_blkno = ocfs2_which_suballoc_group(fe_blkno, suballoc_bit);
- int tmp = ocfs2_free_suballoc_bits(handle, inode_ac->ac_inode,
- inode_ac->ac_bh, suballoc_bit, bg_blkno, 1);
- if (tmp)
- mlog_errno(tmp);
- }
-
- return status;
}
static int ocfs2_mkdir(struct user_namespace *mnt_userns,
@@ -2028,8 +2024,11 @@ bail:
ocfs2_clusters_to_bytes(osb->sb, 1));
if (status < 0 && did_quota_inode)
dquot_free_inode(inode);
- if (handle)
+ if (handle) {
+ if (status < 0 && fe)
+ ocfs2_set_links_count(fe, 0);
ocfs2_commit_trans(osb, handle);
+ }
ocfs2_inode_unlock(dir, 1);
if (did_block_signals)
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 638d875eccc7..7aebdbf5cc0a 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -527,7 +527,7 @@ struct ocfs2_extent_block
* value -1 (0xFFFF) is OCFS2_INVALID_SLOT. This marks a slot empty.
*/
struct ocfs2_slot_map {
-/*00*/ __le16 sm_slots[0];
+/*00*/ DECLARE_FLEX_ARRAY(__le16, sm_slots);
/*
* Actual on-disk size is one block. OCFS2_MAX_SLOTS is 255,
* 255 * sizeof(__le16) == 512B, within the 512B block minimum blocksize.
@@ -548,7 +548,7 @@ struct ocfs2_extended_slot {
* i_size.
*/
struct ocfs2_slot_map_extended {
-/*00*/ struct ocfs2_extended_slot se_slots[0];
+/*00*/ DECLARE_FLEX_ARRAY(struct ocfs2_extended_slot, se_slots);
/*
* Actual size is i_size of the slot_map system file. It should
* match s_max_slots * sizeof(struct ocfs2_extended_slot)
@@ -727,7 +727,7 @@ struct ocfs2_dinode {
struct ocfs2_extent_list i_list;
struct ocfs2_truncate_log i_dealloc;
struct ocfs2_inline_data i_data;
- __u8 i_symlink[0];
+ DECLARE_FLEX_ARRAY(__u8, i_symlink);
} id2;
/* Actual on-disk size is one block */
};
@@ -892,7 +892,7 @@ struct ocfs2_group_desc
/*30*/ struct ocfs2_block_check bg_check; /* Error checking */
__le64 bg_reserved2;
/*40*/ union {
- __u8 bg_bitmap[0];
+ DECLARE_FLEX_ARRAY(__u8, bg_bitmap);
struct {
/*
* Block groups may be discontiguous when
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 1358981e80a3..623db358b1ef 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2614,7 +2614,7 @@ static inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
}
/*
- * Calculate out the start and number of virtual clusters we need to to CoW.
+ * Calculate out the start and number of virtual clusters we need to CoW.
*
* cpos is vitual start cluster position we want to do CoW in a
* file and write_len is the cluster length.
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index a75e2b7d67f5..64e6ddcfe329 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -991,7 +991,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
lc->oc_type = NO_CONTROLD;
rc = dlm_new_lockspace(conn->cc_name, conn->cc_cluster_name,
- DLM_LSFL_FS | DLM_LSFL_NEWEXCL, DLM_LVB_LEN,
+ DLM_LSFL_NEWEXCL, DLM_LVB_LEN,
&ocfs2_ls_ops, conn, &ops_rv, &fsdlm);
if (rc) {
if (rc == -EEXIST || rc == -EPROTO)
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index dd77b7aaabf5..317126261523 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -334,10 +334,10 @@ int ocfs2_cluster_connect(const char *stack_name,
goto out;
}
- strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1);
+ strscpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1);
new_conn->cc_namelen = grouplen;
if (cluster_name_len)
- strlcpy(new_conn->cc_cluster_name, cluster_name,
+ strscpy(new_conn->cc_cluster_name, cluster_name,
CLUSTER_NAME_MAX + 1);
new_conn->cc_cluster_name_len = cluster_name_len;
new_conn->cc_recovery_handler = recovery_handler;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 5805a03d100b..9c74eace3adc 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -106,7 +106,7 @@ int ocfs2_claim_clusters(handle_t *handle,
u32 *cluster_start,
u32 *num_clusters);
/*
- * Use this variant of ocfs2_claim_clusters to specify a maxiumum
+ * Use this variant of ocfs2_claim_clusters to specify a maximum
* number of clusters smaller than the allocation reserved.
*/
int __ocfs2_claim_clusters(handle_t *handle,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index e2cc9eec287c..42c993e53924 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1764,9 +1764,7 @@ static int ocfs2_get_sector(struct super_block *sb,
if (!buffer_dirty(*bh))
clear_buffer_uptodate(*bh);
unlock_buffer(*bh);
- ll_rw_block(REQ_OP_READ, 1, bh);
- wait_on_buffer(*bh);
- if (!buffer_uptodate(*bh)) {
+ if (bh_read(*bh, 0) < 0) {
mlog_errno(-EIO);
brelse(*bh);
*bh = NULL;
@@ -2221,7 +2219,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
goto out_journal;
}
- strlcpy(osb->vol_label, di->id2.i_super.s_label,
+ strscpy(osb->vol_label, di->id2.i_super.s_label,
OCFS2_MAX_VOL_LABEL_LEN);
osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno);
osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno);
diff --git a/fs/open.c b/fs/open.c
index cf7e5c350a54..a81319b6177f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -842,7 +842,9 @@ static int do_dentry_open(struct file *f,
return 0;
}
- if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
+ if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
+ i_readcount_inc(inode);
+ } else if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
error = get_write_access(inode);
if (unlikely(error))
goto cleanup_file;
@@ -882,8 +884,6 @@ static int do_dentry_open(struct file *f,
goto cleanup_all;
}
f->f_mode |= FMODE_OPENED;
- if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
- i_readcount_inc(inode);
if ((f->f_mode & FMODE_READ) &&
likely(f->f_op->read || f->f_op->read_iter))
f->f_mode |= FMODE_CAN_READ;
@@ -937,10 +937,7 @@ cleanup_all:
if (WARN_ON_ONCE(error > 0))
error = -EINVAL;
fops_put(f->f_op);
- if (f->f_mode & FMODE_WRITER) {
- put_write_access(inode);
- __mnt_drop_write(f->f_path.mnt);
- }
+ put_file_access(f);
cleanup_file:
path_put(&f->f_path);
f->f_path.mnt = NULL;
diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c
index e2c2699d8016..9cacce5d55c1 100644
--- a/fs/orangefs/dir.c
+++ b/fs/orangefs/dir.c
@@ -398,7 +398,7 @@ static int orangefs_dir_release(struct inode *inode, struct file *file)
const struct file_operations orangefs_dir_operations = {
.llseek = orangefs_dir_llseek,
.read = generic_read_dir,
- .iterate = orangefs_dir_iterate,
+ .iterate_shared = orangefs_dir_iterate,
.open = orangefs_dir_open,
.release = orangefs_dir_release
};
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 86810e5d7914..732661aa2680 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -417,9 +417,7 @@ static int orangefs_file_release(struct inode *inode, struct file *file)
* readahead cache (if any); this forces an expensive refresh of
* data for the next caller of mmap (or 'get_block' accesses)
*/
- if (file_inode(file) &&
- file_inode(file)->i_mapping &&
- mapping_nrpages(&file_inode(file)->i_data)) {
+ if (mapping_nrpages(file->f_mapping)) {
if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) {
gossip_debug(GOSSIP_INODE_DEBUG,
"calling flush_racache on %pU\n",
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index fdde6c56cc3d..f436d8847f08 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -44,7 +44,7 @@ static bool ovl_must_copy_xattr(const char *name)
!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN);
}
-int ovl_copy_xattr(struct super_block *sb, struct path *oldpath, struct dentry *new)
+int ovl_copy_xattr(struct super_block *sb, const struct path *oldpath, struct dentry *new)
{
struct dentry *old = oldpath->dentry;
ssize_t list_size, size, value_size = 0;
@@ -132,8 +132,8 @@ out:
return error;
}
-static int ovl_copy_fileattr(struct inode *inode, struct path *old,
- struct path *new)
+static int ovl_copy_fileattr(struct inode *inode, const struct path *old,
+ const struct path *new)
{
struct fileattr oldfa = { .flags_valid = true };
struct fileattr newfa = { .flags_valid = true };
@@ -193,11 +193,11 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old,
return ovl_real_fileattr_set(new, &newfa);
}
-static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old,
- struct path *new, loff_t len)
+static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
+ struct file *new_file, loff_t len)
{
+ struct path datapath;
struct file *old_file;
- struct file *new_file;
loff_t old_pos = 0;
loff_t new_pos = 0;
loff_t cloned;
@@ -206,23 +206,18 @@ static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old,
bool skip_hole = false;
int error = 0;
- if (len == 0)
- return 0;
+ ovl_path_lowerdata(dentry, &datapath);
+ if (WARN_ON(datapath.dentry == NULL))
+ return -EIO;
- old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
+ old_file = ovl_path_open(&datapath, O_LARGEFILE | O_RDONLY);
if (IS_ERR(old_file))
return PTR_ERR(old_file);
- new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
- if (IS_ERR(new_file)) {
- error = PTR_ERR(new_file);
- goto out_fput;
- }
-
/* Try to use clone_file_range to clone up within the same fs */
cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
if (cloned == len)
- goto out;
+ goto out_fput;
/* Couldn't clone, so now we try to copy the data */
/* Check if lower fs supports seek operation */
@@ -282,10 +277,8 @@ static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old,
len -= bytes;
}
-out:
if (!error && ovl_should_sync(ofs))
error = vfs_fsync(new_file, 0);
- fput(new_file);
out_fput:
fput(old_file);
return error;
@@ -556,30 +549,31 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
return err;
}
-static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
+static int ovl_copy_up_data(struct ovl_copy_up_ctx *c, const struct path *temp)
{
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
- struct inode *inode = d_inode(c->dentry);
- struct path upperpath, datapath;
+ struct file *new_file;
int err;
- ovl_path_upper(c->dentry, &upperpath);
- if (WARN_ON(upperpath.dentry != NULL))
- return -EIO;
+ if (!S_ISREG(c->stat.mode) || c->metacopy || !c->stat.size)
+ return 0;
- upperpath.dentry = temp;
+ new_file = ovl_path_open(temp, O_LARGEFILE | O_WRONLY);
+ if (IS_ERR(new_file))
+ return PTR_ERR(new_file);
- /*
- * Copy up data first and then xattrs. Writing data after
- * xattrs will remove security.capability xattr automatically.
- */
- if (S_ISREG(c->stat.mode) && !c->metacopy) {
- ovl_path_lowerdata(c->dentry, &datapath);
- err = ovl_copy_up_data(ofs, &datapath, &upperpath,
- c->stat.size);
- if (err)
- return err;
- }
+ err = ovl_copy_up_file(ofs, c->dentry, new_file, c->stat.size);
+ fput(new_file);
+
+ return err;
+}
+
+static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
+{
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+ struct inode *inode = d_inode(c->dentry);
+ struct path upperpath = { .mnt = ovl_upper_mnt(ofs), .dentry = temp };
+ int err;
err = ovl_copy_xattr(c->dentry->d_sb, &c->lowerpath, temp);
if (err)
@@ -662,6 +656,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct inode *inode;
struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
+ struct path path = { .mnt = ovl_upper_mnt(ofs) };
struct dentry *temp, *upper;
struct ovl_cu_creds cc;
int err;
@@ -688,7 +683,16 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
if (IS_ERR(temp))
goto unlock;
- err = ovl_copy_up_inode(c, temp);
+ /*
+ * Copy up data first and then xattrs. Writing data after
+ * xattrs will remove security.capability xattr automatically.
+ */
+ path.dentry = temp;
+ err = ovl_copy_up_data(c, &path);
+ if (err)
+ goto cleanup;
+
+ err = ovl_copy_up_metadata(c, temp);
if (err)
goto cleanup;
@@ -732,6 +736,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct inode *udir = d_inode(c->destdir);
struct dentry *temp, *upper;
+ struct file *tmpfile;
struct ovl_cu_creds cc;
int err;
@@ -739,15 +744,22 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
if (err)
return err;
- temp = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode);
+ tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode);
ovl_revert_cu_creds(&cc);
- if (IS_ERR(temp))
- return PTR_ERR(temp);
+ if (IS_ERR(tmpfile))
+ return PTR_ERR(tmpfile);
- err = ovl_copy_up_inode(c, temp);
+ temp = tmpfile->f_path.dentry;
+ if (!c->metacopy && c->stat.size) {
+ err = ovl_copy_up_file(ofs, c->dentry, tmpfile, c->stat.size);
+ if (err)
+ return err;
+ }
+
+ err = ovl_copy_up_metadata(c, temp);
if (err)
- goto out_dput;
+ goto out_fput;
inode_lock_nested(udir, I_MUTEX_PARENT);
@@ -761,16 +773,14 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
inode_unlock(udir);
if (err)
- goto out_dput;
+ goto out_fput;
if (!c->metacopy)
ovl_set_upperdata(d_inode(c->dentry));
- ovl_inode_update(d_inode(c->dentry), temp);
+ ovl_inode_update(d_inode(c->dentry), dget(temp));
- return 0;
-
-out_dput:
- dput(temp);
+out_fput:
+ fput(tmpfile);
return err;
}
@@ -872,7 +882,7 @@ static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
return true;
}
-static ssize_t ovl_getxattr_value(struct path *path, char *name, char **value)
+static ssize_t ovl_getxattr_value(const struct path *path, char *name, char **value)
{
ssize_t res;
char *buf;
@@ -899,7 +909,7 @@ static ssize_t ovl_getxattr_value(struct path *path, char *name, char **value)
static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
{
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
- struct path upperpath, datapath;
+ struct path upperpath;
int err;
char *capability = NULL;
ssize_t cap_size;
@@ -908,10 +918,6 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
if (WARN_ON(upperpath.dentry == NULL))
return -EIO;
- ovl_path_lowerdata(c->dentry, &datapath);
- if (WARN_ON(datapath.dentry == NULL))
- return -EIO;
-
if (c->stat.size) {
err = cap_size = ovl_getxattr_value(&upperpath, XATTR_NAME_CAPS,
&capability);
@@ -919,7 +925,7 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
goto out;
}
- err = ovl_copy_up_data(ofs, &datapath, &upperpath, c->stat.size);
+ err = ovl_copy_up_data(c, &upperpath);
if (err)
goto out_free;
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index daff601b5c41..a1a22f58ba18 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -38,7 +38,7 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode)
#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
static struct file *ovl_open_realfile(const struct file *file,
- struct path *realpath)
+ const struct path *realpath)
{
struct inode *realinode = d_inode(realpath->dentry);
struct inode *inode = file_inode(file);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 0fbcb590af84..9e61511de7a7 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -588,7 +588,7 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
* Introducing security_inode_fileattr_get/set() hooks would solve this issue
* properly.
*/
-static int ovl_security_fileattr(struct path *realpath, struct fileattr *fa,
+static int ovl_security_fileattr(const struct path *realpath, struct fileattr *fa,
bool set)
{
struct file *file;
@@ -610,7 +610,7 @@ static int ovl_security_fileattr(struct path *realpath, struct fileattr *fa,
return err;
}
-int ovl_real_fileattr_set(struct path *realpath, struct fileattr *fa)
+int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa)
{
int err;
@@ -685,7 +685,7 @@ static void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa)
}
}
-int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa)
+int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa)
{
int err;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 69dc577974f8..0fd1d5fdfc72 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -26,7 +26,7 @@ struct ovl_lookup_data {
bool metacopy;
};
-static int ovl_check_redirect(struct path *path, struct ovl_lookup_data *d,
+static int ovl_check_redirect(const struct path *path, struct ovl_lookup_data *d,
size_t prelen, const char *post)
{
int res;
@@ -194,7 +194,7 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
return real;
}
-static bool ovl_is_opaquedir(struct ovl_fs *ofs, struct path *path)
+static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path)
{
return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE);
}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 87759165d32b..eee8f08d32b6 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -208,7 +208,7 @@ static inline int ovl_do_symlink(struct ovl_fs *ofs,
return err;
}
-static inline ssize_t ovl_do_getxattr(struct path *path, const char *name,
+static inline ssize_t ovl_do_getxattr(const struct path *path, const char *name,
void *value, size_t size)
{
int err, len;
@@ -238,7 +238,7 @@ static inline ssize_t ovl_getxattr_upper(struct ovl_fs *ofs,
}
static inline ssize_t ovl_path_getxattr(struct ovl_fs *ofs,
- struct path *path,
+ const struct path *path,
enum ovl_xattr ox, void *value,
size_t size)
{
@@ -250,7 +250,7 @@ static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
size_t size, int flags)
{
int err = vfs_setxattr(ovl_upper_mnt_userns(ofs), dentry, name,
- (void *)value, size, flags);
+ value, size, flags);
pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n",
dentry, name, min((int)size, 48), value, size, flags, err);
@@ -310,14 +310,16 @@ static inline int ovl_do_whiteout(struct ovl_fs *ofs,
return err;
}
-static inline struct dentry *ovl_do_tmpfile(struct ovl_fs *ofs,
- struct dentry *dentry, umode_t mode)
+static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs,
+ struct dentry *dentry, umode_t mode)
{
- struct dentry *ret = vfs_tmpfile(ovl_upper_mnt_userns(ofs), dentry, mode, 0);
- int err = PTR_ERR_OR_ZERO(ret);
+ struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry };
+ struct file *file = vfs_tmpfile_open(ovl_upper_mnt_userns(ofs), &path, mode,
+ O_LARGEFILE | O_WRONLY, current_cred());
+ int err = PTR_ERR_OR_ZERO(file);
pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
- return ret;
+ return file;
}
static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs,
@@ -401,13 +403,13 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
void ovl_dir_modified(struct dentry *dentry, bool impurity);
u64 ovl_dentry_version_get(struct dentry *dentry);
bool ovl_is_whiteout(struct dentry *dentry);
-struct file *ovl_path_open(struct path *path, int flags);
+struct file *ovl_path_open(const struct path *path, int flags);
int ovl_copy_up_start(struct dentry *dentry, int flags);
void ovl_copy_up_end(struct dentry *dentry);
bool ovl_already_copied_up(struct dentry *dentry, int flags);
-bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, struct path *path,
+bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
enum ovl_xattr ox);
-bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, struct path *path);
+bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs,
struct dentry *upperdentry)
@@ -430,9 +432,9 @@ bool ovl_need_index(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry);
void ovl_nlink_end(struct dentry *dentry);
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
-int ovl_check_metacopy_xattr(struct ovl_fs *ofs, struct path *path);
+int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path);
bool ovl_is_metacopy_dentry(struct dentry *dentry);
-char *ovl_get_redirect_xattr(struct ovl_fs *ofs, struct path *path, int padding);
+char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding);
int ovl_sync_status(struct ovl_fs *ofs);
static inline void ovl_set_flag(unsigned long flag, struct inode *inode)
@@ -556,7 +558,7 @@ void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
struct list_head *list);
void ovl_cache_free(struct list_head *list);
void ovl_dir_cache_free(struct inode *inode);
-int ovl_check_d_type_supported(struct path *realpath);
+int ovl_check_d_type_supported(const struct path *realpath);
int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir,
struct vfsmount *mnt, struct dentry *dentry, int level);
int ovl_indexdir_cleanup(struct ovl_fs *ofs);
@@ -673,8 +675,8 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
extern const struct file_operations ovl_file_operations;
int __init ovl_aio_request_cache_init(void);
void ovl_aio_request_cache_destroy(void);
-int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa);
-int ovl_real_fileattr_set(struct path *realpath, struct fileattr *fa);
+int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa);
+int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa);
int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa);
int ovl_fileattr_set(struct user_namespace *mnt_userns,
struct dentry *dentry, struct fileattr *fa);
@@ -683,7 +685,7 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns,
int ovl_copy_up(struct dentry *dentry);
int ovl_copy_up_with_data(struct dentry *dentry);
int ovl_maybe_copy_up(struct dentry *dentry, int flags);
-int ovl_copy_xattr(struct super_block *sb, struct path *path, struct dentry *new);
+int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new);
int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat);
struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
bool is_upper);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 78f62cc1797b..2b210640036c 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -170,7 +170,7 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
return p;
}
-static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
+static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
const char *name, int len, u64 ino,
unsigned int d_type)
{
@@ -179,22 +179,22 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
struct ovl_cache_entry *p;
if (ovl_cache_entry_find_link(name, len, &newp, &parent))
- return 0;
+ return true;
p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
if (p == NULL) {
rdd->err = -ENOMEM;
- return -ENOMEM;
+ return false;
}
list_add_tail(&p->l_node, rdd->list);
rb_link_node(&p->node, parent, newp);
rb_insert_color(&p->node, rdd->root);
- return 0;
+ return true;
}
-static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
+static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
const char *name, int namelen,
loff_t offset, u64 ino, unsigned int d_type)
{
@@ -211,7 +211,7 @@ static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
list_add_tail(&p->l_node, &rdd->middle);
}
- return rdd->err;
+ return rdd->err == 0;
}
void ovl_cache_free(struct list_head *list)
@@ -250,7 +250,7 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
}
}
-static int ovl_fill_merge(struct dir_context *ctx, const char *name,
+static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -264,7 +264,7 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name,
return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
}
-static int ovl_check_whiteouts(struct path *path, struct ovl_readdir_data *rdd)
+static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
{
int err;
struct ovl_cache_entry *p;
@@ -291,7 +291,7 @@ static int ovl_check_whiteouts(struct path *path, struct ovl_readdir_data *rdd)
return err;
}
-static inline int ovl_dir_read(struct path *realpath,
+static inline int ovl_dir_read(const struct path *realpath,
struct ovl_readdir_data *rdd)
{
struct file *realfile;
@@ -455,7 +455,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
* copy up origin, call vfs_getattr() on the overlay entry to make
* sure that d_ino will be consistent with st_ino from stat(2).
*/
-static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
+static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry *p)
{
struct dentry *dir = path->dentry;
@@ -528,7 +528,7 @@ fail:
goto out;
}
-static int ovl_fill_plain(struct dir_context *ctx, const char *name,
+static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -540,14 +540,14 @@ static int ovl_fill_plain(struct dir_context *ctx, const char *name,
p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
if (p == NULL) {
rdd->err = -ENOMEM;
- return -ENOMEM;
+ return false;
}
list_add_tail(&p->l_node, rdd->list);
- return 0;
+ return true;
}
-static int ovl_dir_read_impure(struct path *path, struct list_head *list,
+static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
struct rb_root *root)
{
int err;
@@ -592,7 +592,7 @@ static int ovl_dir_read_impure(struct path *path, struct list_head *list,
return 0;
}
-static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
+static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path)
{
int res;
struct dentry *dentry = path->dentry;
@@ -648,7 +648,7 @@ struct ovl_readdir_translate {
bool xinowarn;
};
-static int ovl_fill_real(struct dir_context *ctx, const char *name,
+static bool ovl_fill_real(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -834,7 +834,7 @@ out_unlock:
}
static struct file *ovl_dir_open_realfile(const struct file *file,
- struct path *realpath)
+ const struct path *realpath)
{
struct file *res;
const struct cred *old_cred;
@@ -1027,7 +1027,7 @@ void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
inode_unlock(upper->d_inode);
}
-static int ovl_check_d_type(struct dir_context *ctx, const char *name,
+static bool ovl_check_d_type(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -1036,19 +1036,19 @@ static int ovl_check_d_type(struct dir_context *ctx, const char *name,
/* Even if d_type is not supported, DT_DIR is returned for . and .. */
if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
- return 0;
+ return true;
if (d_type != DT_UNKNOWN)
rdd->d_type_supported = true;
- return 0;
+ return true;
}
/*
* Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
* if error is encountered.
*/
-int ovl_check_d_type_supported(struct path *realpath)
+int ovl_check_d_type_supported(const struct path *realpath)
{
int err;
struct ovl_readdir_data rdd = {
@@ -1065,7 +1065,7 @@ int ovl_check_d_type_supported(struct path *realpath)
#define OVL_INCOMPATDIR_NAME "incompat"
-static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, struct path *path,
+static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path,
int level)
{
int err;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ec746d447f1b..a29a8afe9b26 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -15,6 +15,7 @@
#include <linux/seq_file.h>
#include <linux/posix_acl_xattr.h>
#include <linux/exportfs.h>
+#include <linux/file.h>
#include "overlayfs.h"
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
@@ -908,7 +909,7 @@ static int ovl_mount_dir(const char *name, struct path *path)
return err;
}
-static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
+static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs,
const char *name)
{
struct kstatfs statfs;
@@ -1022,7 +1023,20 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
/* Check that everything is OK before copy-up */
if (value) {
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
+ /* The above comment can be understood in two ways:
+ *
+ * 1. We just want to check whether the basic POSIX ACL format
+ * is ok. For example, if the header is correct and the size
+ * is sane.
+ * 2. We want to know whether the ACL_{GROUP,USER} entries can
+ * be mapped according to the underlying filesystem.
+ *
+ * Currently, we only check 1. If we wanted to check 2. we
+ * would need to pass the mnt_userns and the fs_userns of the
+ * underlying filesystem. But frankly, I think checking 1. is
+ * enough to start the copy-up.
+ */
+ acl = vfs_set_acl_prepare(&init_user_ns, &init_user_ns, value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
}
@@ -1353,10 +1367,11 @@ static int ovl_create_volatile_dirty(struct ovl_fs *ofs)
}
static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
- struct path *workpath)
+ const struct path *workpath)
{
struct vfsmount *mnt = ovl_upper_mnt(ofs);
- struct dentry *temp, *workdir;
+ struct dentry *workdir;
+ struct file *tmpfile;
bool rename_whiteout;
bool d_type;
int fh_type;
@@ -1392,10 +1407,10 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
pr_warn("upper fs needs to support d_type.\n");
/* Check if upper/work fs supports O_TMPFILE */
- temp = ovl_do_tmpfile(ofs, ofs->workdir, S_IFREG | 0);
- ofs->tmpfile = !IS_ERR(temp);
+ tmpfile = ovl_do_tmpfile(ofs, ofs->workdir, S_IFREG | 0);
+ ofs->tmpfile = !IS_ERR(tmpfile);
if (ofs->tmpfile)
- dput(temp);
+ fput(tmpfile);
else
pr_warn("upper fs does not support tmpfile.\n");
@@ -1482,7 +1497,7 @@ out:
}
static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
- struct path *upperpath)
+ const struct path *upperpath)
{
int err;
struct path workpath = { };
@@ -1525,7 +1540,7 @@ out:
}
static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
- struct ovl_entry *oe, struct path *upperpath)
+ struct ovl_entry *oe, const struct path *upperpath)
{
struct vfsmount *mnt = ovl_upper_mnt(ofs);
struct dentry *indexdir;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 87f811c089e4..81a57a8d80d9 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -490,7 +490,7 @@ bool ovl_is_whiteout(struct dentry *dentry)
return inode && IS_WHITEOUT(inode);
}
-struct file *ovl_path_open(struct path *path, int flags)
+struct file *ovl_path_open(const struct path *path, int flags)
{
struct inode *inode = d_inode(path->dentry);
struct user_namespace *real_mnt_userns = mnt_user_ns(path->mnt);
@@ -578,7 +578,7 @@ void ovl_copy_up_end(struct dentry *dentry)
ovl_inode_unlock(d_inode(dentry));
}
-bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, struct path *path)
+bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path)
{
int res;
@@ -591,7 +591,7 @@ bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, struct path *path)
return false;
}
-bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, struct path *path,
+bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
enum ovl_xattr ox)
{
int res;
@@ -971,7 +971,7 @@ err:
}
/* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */
-int ovl_check_metacopy_xattr(struct ovl_fs *ofs, struct path *path)
+int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path)
{
int res;
@@ -1015,7 +1015,7 @@ bool ovl_is_metacopy_dentry(struct dentry *dentry)
return (oe->numlower > 1);
}
-char *ovl_get_redirect_xattr(struct ovl_fs *ofs, struct path *path, int padding)
+char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding)
{
int res;
char *s, *next, *buf = NULL;
diff --git a/fs/pipe.c b/fs/pipe.c
index 74ae9fafd25a..42c7ff41c2db 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -860,7 +860,7 @@ static struct vfsmount *pipe_mnt __read_mostly;
*/
static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
{
- return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
+ return dynamic_dname(buffer, buflen, "pipe:[%lu]",
d_inode(dentry)->i_ino);
}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 5af33800743e..74dc0f571dc9 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -24,6 +24,7 @@
#include <linux/user_namespace.h>
#include <linux/namei.h>
#include <linux/mnt_idmapping.h>
+#include <linux/iversion.h>
static struct posix_acl **acl_by_type(struct inode *inode, int type)
{
@@ -710,9 +711,9 @@ EXPORT_SYMBOL(posix_acl_update_mode);
/*
* Fix up the uids and gids in posix acl extended attributes in place.
*/
-static int posix_acl_fix_xattr_common(void *value, size_t size)
+static int posix_acl_fix_xattr_common(const void *value, size_t size)
{
- struct posix_acl_xattr_header *header = value;
+ const struct posix_acl_xattr_header *header = value;
int count;
if (!header)
@@ -720,13 +721,13 @@ static int posix_acl_fix_xattr_common(void *value, size_t size)
if (size < sizeof(struct posix_acl_xattr_header))
return -EINVAL;
if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
- return -EINVAL;
+ return -EOPNOTSUPP;
count = posix_acl_xattr_count(size);
if (count < 0)
return -EINVAL;
if (count == 0)
- return -EINVAL;
+ return 0;
return count;
}
@@ -748,7 +749,7 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
return;
count = posix_acl_fix_xattr_common(value, size);
- if (count < 0)
+ if (count <= 0)
return;
for (end = entry + count; entry != end; entry++) {
@@ -771,46 +772,6 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
}
}
-void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
- const struct inode *inode,
- void *value, size_t size)
-{
- struct posix_acl_xattr_header *header = value;
- struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
- struct user_namespace *fs_userns = i_user_ns(inode);
- int count;
- vfsuid_t vfsuid;
- vfsgid_t vfsgid;
- kuid_t uid;
- kgid_t gid;
-
- if (no_idmapping(mnt_userns, i_user_ns(inode)))
- return;
-
- count = posix_acl_fix_xattr_common(value, size);
- if (count < 0)
- return;
-
- for (end = entry + count; entry != end; entry++) {
- switch (le16_to_cpu(entry->e_tag)) {
- case ACL_USER:
- uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
- vfsuid = VFSUIDT_INIT(uid);
- uid = from_vfsuid(mnt_userns, fs_userns, vfsuid);
- entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, uid));
- break;
- case ACL_GROUP:
- gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
- vfsgid = VFSGIDT_INIT(gid);
- gid = from_vfsgid(mnt_userns, fs_userns, vfsgid);
- entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, gid));
- break;
- default:
- break;
- }
- }
-}
-
static void posix_acl_fix_xattr_userns(
struct user_namespace *to, struct user_namespace *from,
void *value, size_t size)
@@ -822,7 +783,7 @@ static void posix_acl_fix_xattr_userns(
kgid_t gid;
count = posix_acl_fix_xattr_common(value, size);
- if (count < 0)
+ if (count <= 0)
return;
for (end = entry + count; entry != end; entry++) {
@@ -857,12 +818,32 @@ void posix_acl_fix_xattr_to_user(void *value, size_t size)
posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size);
}
-/*
- * Convert from extended attribute to in-memory representation.
+/**
+ * make_posix_acl - convert POSIX ACLs from uapi to VFS format using the
+ * provided callbacks to map ACL_{GROUP,USER} entries into the
+ * appropriate format
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @value: the uapi representation of POSIX ACLs
+ * @size: the size of @void
+ * @uid_cb: callback to use for mapping the uid stored in ACL_USER entries
+ * @gid_cb: callback to use for mapping the gid stored in ACL_GROUP entries
+ *
+ * The make_posix_acl() helper is an abstraction to translate from uapi format
+ * into the VFS format allowing the caller to specific callbacks to map
+ * ACL_{GROUP,USER} entries into the expected format. This is used in
+ * posix_acl_from_xattr() and vfs_set_acl_prepare() and avoids pointless code
+ * duplication.
+ *
+ * Return: Allocated struct posix_acl on success, NULL for a valid header but
+ * without actual POSIX ACL entries, or ERR_PTR() encoded error code.
*/
-struct posix_acl *
-posix_acl_from_xattr(struct user_namespace *user_ns,
- const void *value, size_t size)
+static struct posix_acl *make_posix_acl(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns, const void *value, size_t size,
+ kuid_t (*uid_cb)(struct user_namespace *, struct user_namespace *,
+ const struct posix_acl_xattr_entry *),
+ kgid_t (*gid_cb)(struct user_namespace *, struct user_namespace *,
+ const struct posix_acl_xattr_entry *))
{
const struct posix_acl_xattr_header *header = value;
const struct posix_acl_xattr_entry *entry = (const void *)(header + 1), *end;
@@ -870,16 +851,9 @@ posix_acl_from_xattr(struct user_namespace *user_ns,
struct posix_acl *acl;
struct posix_acl_entry *acl_e;
- if (!value)
- return NULL;
- if (size < sizeof(struct posix_acl_xattr_header))
- return ERR_PTR(-EINVAL);
- if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
- return ERR_PTR(-EOPNOTSUPP);
-
- count = posix_acl_xattr_count(size);
+ count = posix_acl_fix_xattr_common(value, size);
if (count < 0)
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(count);
if (count == 0)
return NULL;
@@ -900,16 +874,12 @@ posix_acl_from_xattr(struct user_namespace *user_ns,
break;
case ACL_USER:
- acl_e->e_uid =
- make_kuid(user_ns,
- le32_to_cpu(entry->e_id));
+ acl_e->e_uid = uid_cb(mnt_userns, fs_userns, entry);
if (!uid_valid(acl_e->e_uid))
goto fail;
break;
case ACL_GROUP:
- acl_e->e_gid =
- make_kgid(user_ns,
- le32_to_cpu(entry->e_id));
+ acl_e->e_gid = gid_cb(mnt_userns, fs_userns, entry);
if (!gid_valid(acl_e->e_gid))
goto fail;
break;
@@ -924,6 +894,181 @@ fail:
posix_acl_release(acl);
return ERR_PTR(-EINVAL);
}
+
+/**
+ * vfs_set_acl_prepare_kuid - map ACL_USER uid according to mount- and
+ * filesystem idmapping
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @e: a ACL_USER entry in POSIX ACL uapi format
+ *
+ * The uid stored as ACL_USER entry in @e is a kuid_t stored as a raw {g,u}id
+ * value. The vfs_set_acl_prepare_kuid() will recover the kuid_t through
+ * KUIDT_INIT() and then map it according to the idmapped mount. The resulting
+ * kuid_t is the value which the filesystem can map up into a raw backing store
+ * id in the filesystem's idmapping.
+ *
+ * This is used in vfs_set_acl_prepare() to generate the proper VFS
+ * representation of POSIX ACLs with ACL_USER entries during setxattr().
+ *
+ * Return: A kuid in @fs_userns for the uid stored in @e.
+ */
+static inline kuid_t
+vfs_set_acl_prepare_kuid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ const struct posix_acl_xattr_entry *e)
+{
+ kuid_t kuid = KUIDT_INIT(le32_to_cpu(e->e_id));
+ return from_vfsuid(mnt_userns, fs_userns, VFSUIDT_INIT(kuid));
+}
+
+/**
+ * vfs_set_acl_prepare_kgid - map ACL_GROUP gid according to mount- and
+ * filesystem idmapping
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @e: a ACL_GROUP entry in POSIX ACL uapi format
+ *
+ * The gid stored as ACL_GROUP entry in @e is a kgid_t stored as a raw {g,u}id
+ * value. The vfs_set_acl_prepare_kgid() will recover the kgid_t through
+ * KGIDT_INIT() and then map it according to the idmapped mount. The resulting
+ * kgid_t is the value which the filesystem can map up into a raw backing store
+ * id in the filesystem's idmapping.
+ *
+ * This is used in vfs_set_acl_prepare() to generate the proper VFS
+ * representation of POSIX ACLs with ACL_GROUP entries during setxattr().
+ *
+ * Return: A kgid in @fs_userns for the gid stored in @e.
+ */
+static inline kgid_t
+vfs_set_acl_prepare_kgid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ const struct posix_acl_xattr_entry *e)
+{
+ kgid_t kgid = KGIDT_INIT(le32_to_cpu(e->e_id));
+ return from_vfsgid(mnt_userns, fs_userns, VFSGIDT_INIT(kgid));
+}
+
+/**
+ * vfs_set_acl_prepare - convert POSIX ACLs from uapi to VFS format taking
+ * mount and filesystem idmappings into account
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @value: the uapi representation of POSIX ACLs
+ * @size: the size of @void
+ *
+ * When setting POSIX ACLs with ACL_{GROUP,USER} entries they need to be
+ * mapped according to the relevant mount- and filesystem idmapping. It is
+ * important that the ACL_{GROUP,USER} entries in struct posix_acl will be
+ * mapped into k{g,u}id_t that are supposed to be mapped up in the filesystem
+ * idmapping. This is crucial since the resulting struct posix_acl might be
+ * cached filesystem wide. The vfs_set_acl_prepare() function will take care to
+ * perform all necessary idmappings.
+ *
+ * Note, that since basically forever the {g,u}id values encoded as
+ * ACL_{GROUP,USER} entries in the uapi POSIX ACLs passed via @value contain
+ * values that have been mapped according to the caller's idmapping. In other
+ * words, POSIX ACLs passed in uapi format as @value during setxattr() contain
+ * {g,u}id values in their ACL_{GROUP,USER} entries that should actually have
+ * been stored as k{g,u}id_t.
+ *
+ * This means, vfs_set_acl_prepare() needs to first recover the k{g,u}id_t by
+ * calling K{G,U}IDT_INIT(). Afterwards they can be interpreted as vfs{g,u}id_t
+ * through from_vfs{g,u}id() to account for any idmapped mounts. The
+ * vfs_set_acl_prepare_k{g,u}id() helpers will take care to generate the
+ * correct k{g,u}id_t.
+ *
+ * The filesystem will then receive the POSIX ACLs ready to be cached
+ * filesystem wide and ready to be written to the backing store taking the
+ * filesystem's idmapping into account.
+ *
+ * Return: Allocated struct posix_acl on success, NULL for a valid header but
+ * without actual POSIX ACL entries, or ERR_PTR() encoded error code.
+ */
+struct posix_acl *vfs_set_acl_prepare(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ const void *value, size_t size)
+{
+ return make_posix_acl(mnt_userns, fs_userns, value, size,
+ vfs_set_acl_prepare_kuid,
+ vfs_set_acl_prepare_kgid);
+}
+EXPORT_SYMBOL(vfs_set_acl_prepare);
+
+/**
+ * posix_acl_from_xattr_kuid - map ACL_USER uid into filesystem idmapping
+ * @mnt_userns: unused
+ * @fs_userns: the filesystem's idmapping
+ * @e: a ACL_USER entry in POSIX ACL uapi format
+ *
+ * Map the uid stored as ACL_USER entry in @e into the filesystem's idmapping.
+ * This is used in posix_acl_from_xattr() to generate the proper VFS
+ * representation of POSIX ACLs with ACL_USER entries.
+ *
+ * Return: A kuid in @fs_userns for the uid stored in @e.
+ */
+static inline kuid_t
+posix_acl_from_xattr_kuid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ const struct posix_acl_xattr_entry *e)
+{
+ return make_kuid(fs_userns, le32_to_cpu(e->e_id));
+}
+
+/**
+ * posix_acl_from_xattr_kgid - map ACL_GROUP gid into filesystem idmapping
+ * @mnt_userns: unused
+ * @fs_userns: the filesystem's idmapping
+ * @e: a ACL_GROUP entry in POSIX ACL uapi format
+ *
+ * Map the gid stored as ACL_GROUP entry in @e into the filesystem's idmapping.
+ * This is used in posix_acl_from_xattr() to generate the proper VFS
+ * representation of POSIX ACLs with ACL_GROUP entries.
+ *
+ * Return: A kgid in @fs_userns for the gid stored in @e.
+ */
+static inline kgid_t
+posix_acl_from_xattr_kgid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ const struct posix_acl_xattr_entry *e)
+{
+ return make_kgid(fs_userns, le32_to_cpu(e->e_id));
+}
+
+/**
+ * posix_acl_from_xattr - convert POSIX ACLs from backing store to VFS format
+ * @fs_userns: the filesystem's idmapping
+ * @value: the uapi representation of POSIX ACLs
+ * @size: the size of @void
+ *
+ * Filesystems that store POSIX ACLs in the unaltered uapi format should use
+ * posix_acl_from_xattr() when reading them from the backing store and
+ * converting them into the struct posix_acl VFS format. The helper is
+ * specifically intended to be called from the ->get_acl() inode operation.
+ *
+ * The posix_acl_from_xattr() function will map the raw {g,u}id values stored
+ * in ACL_{GROUP,USER} entries into the filesystem idmapping in @fs_userns. The
+ * posix_acl_from_xattr_k{g,u}id() helpers will take care to generate the
+ * correct k{g,u}id_t. The returned struct posix_acl can be cached.
+ *
+ * Note that posix_acl_from_xattr() does not take idmapped mounts into account.
+ * If it did it calling is from the ->get_acl() inode operation would return
+ * POSIX ACLs mapped according to an idmapped mount which would mean that the
+ * value couldn't be cached for the filesystem. Idmapped mounts are taken into
+ * account on the fly during permission checking or right at the VFS -
+ * userspace boundary before reporting them to the user.
+ *
+ * Return: Allocated struct posix_acl on success, NULL for a valid header but
+ * without actual POSIX ACL entries, or ERR_PTR() encoded error code.
+ */
+struct posix_acl *
+posix_acl_from_xattr(struct user_namespace *fs_userns,
+ const void *value, size_t size)
+{
+ return make_posix_acl(&init_user_ns, fs_userns, value, size,
+ posix_acl_from_xattr_kuid,
+ posix_acl_from_xattr_kgid);
+}
EXPORT_SYMBOL (posix_acl_from_xattr);
/*
@@ -1027,7 +1172,17 @@ posix_acl_xattr_set(const struct xattr_handler *handler,
int ret;
if (value) {
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
+ /*
+ * By the time we end up here the {g,u}ids stored in
+ * ACL_{GROUP,USER} have already been mapped according to the
+ * caller's idmapping. The vfs_set_acl_prepare() helper will
+ * recover them and take idmapped mounts into account. The
+ * filesystem will receive the POSIX ACLs in the correct
+ * format ready to be cached or written to the backing store
+ * taking the filesystem idmapping into account.
+ */
+ acl = vfs_set_acl_prepare(mnt_userns, i_user_ns(inode),
+ value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
}
@@ -1073,6 +1228,8 @@ int simple_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
}
inode->i_ctime = current_time(inode);
+ if (IS_I_VERSION(inode))
+ inode_inc_iversion(inode);
set_cached_acl(inode, type, acl);
return 0;
}
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index c930001056f9..32b1116ae137 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -92,6 +92,7 @@ config PROC_PAGE_MONITOR
config PROC_CHILDREN
bool "Include /proc/<pid>/task/<tid>/children file"
+ depends on PROC_FS
default n
help
Provides a fast way to retrieve first level children pids of a task. See
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 99fcbfda8e25..49283b8103c7 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -279,7 +279,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
collect_sigign_sigcatch(p, &ignored, &caught);
num_threads = get_nr_threads(p);
rcu_read_lock(); /* FIXME: is this correct? */
- qsize = get_ucounts_value(task_ucounts(p), UCOUNT_RLIMIT_SIGPENDING);
+ qsize = get_rlimit_value(task_ucounts(p), UCOUNT_RLIMIT_SIGPENDING);
rcu_read_unlock();
qlim = task_rlimit(p, RLIMIT_SIGPENDING);
unlock_task_sighand(p, &flags);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 93f7e3d971e4..9e479d7d202b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1761,7 +1761,7 @@ out:
return ERR_PTR(error);
}
-static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
+static int do_proc_readlink(const struct path *path, char __user *buffer, int buflen)
{
char *tmp = kmalloc(PATH_MAX, GFP_KERNEL);
char *pathname;
@@ -2350,6 +2350,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
GENRADIX(struct map_files_info) fa;
struct map_files_info *p;
int ret;
+ struct vma_iterator vmi;
genradix_init(&fa);
@@ -2388,7 +2389,9 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
* routine might require mmap_lock taken in might_fault().
*/
- for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
+ pos = 2;
+ vma_iter_init(&vmi, mm, 0);
+ for_each_vma(vmi, vma) {
if (!vma->vm_file)
continue;
if (++pos <= ctx->pos)
@@ -2728,7 +2731,7 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
return -ESRCH;
length = security_getprocattr(task, PROC_I(inode)->op.lsm,
- (char*)file->f_path.dentry->d_name.name,
+ file->f_path.dentry->d_name.name,
&p);
put_task_struct(task);
if (length > 0)
@@ -3196,6 +3199,19 @@ static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace *
return 0;
}
+static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ struct mm_struct *mm;
+
+ mm = get_task_mm(task);
+ if (mm) {
+ seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items);
+ mmput(mm);
+ }
+
+ return 0;
+}
#endif /* CONFIG_KSM */
#ifdef CONFIG_STACKLEAK_METRICS
@@ -3331,6 +3347,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#endif
#ifdef CONFIG_KSM
ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
+ ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat),
#endif
};
@@ -3668,6 +3685,7 @@ static const struct pid_entry tid_base_stuff[] = {
#endif
#ifdef CONFIG_KSM
ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
+ ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat),
#endif
};
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index 837971e74109..fe7bfcb7d049 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -4,6 +4,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/blkdev.h>
+#include "internal.h"
static int devinfo_show(struct seq_file *f, void *v)
{
@@ -54,7 +55,10 @@ static const struct seq_operations devinfo_ops = {
static int __init proc_devices_init(void)
{
- proc_create_seq("devices", 0, NULL, &devinfo_ops);
+ struct proc_dir_entry *pde;
+
+ pde = proc_create_seq("devices", 0, NULL, &devinfo_ops);
+ pde_make_permanent(pde);
return 0;
}
fs_initcall(proc_devices_init);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 06a80f78433d..b701d0207edf 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -79,6 +79,11 @@ static inline bool pde_is_permanent(const struct proc_dir_entry *pde)
return pde->flags & PROC_ENTRY_PERMANENT;
}
+static inline void pde_make_permanent(struct proc_dir_entry *pde)
+{
+ pde->flags |= PROC_ENTRY_PERMANENT;
+}
+
extern struct kmem_cache *proc_dir_entry_cache;
void pde_free(struct proc_dir_entry *pde);
@@ -285,7 +290,7 @@ struct proc_maps_private {
struct task_struct *task;
struct mm_struct *mm;
#ifdef CONFIG_MMU
- struct vm_area_struct *tail_vma;
+ struct vma_iterator iter;
#endif
#ifdef CONFIG_NUMA
struct mempolicy *task_mempolicy;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 592e6dc7c110..2fc92a13f9f8 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -17,8 +17,6 @@
#include <asm/io.h>
-extern wait_queue_head_t log_wait;
-
static int kmsg_open(struct inode * inode, struct file * file)
{
return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_PROC);
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index f32878d9a39f..817981e57223 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -9,6 +9,7 @@
#include <linux/seq_file.h>
#include <linux/seqlock.h>
#include <linux/time.h>
+#include "internal.h"
static int loadavg_proc_show(struct seq_file *m, void *v)
{
@@ -27,7 +28,10 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
static int __init proc_loadavg_init(void)
{
- proc_create_single("loadavg", 0, NULL, loadavg_proc_show);
+ struct proc_dir_entry *pde;
+
+ pde = proc_create_single("loadavg", 0, NULL, loadavg_proc_show);
+ pde_make_permanent(pde);
return 0;
}
fs_initcall(proc_loadavg_init);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 6e89f0e2fd20..5101131e6047 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -115,6 +115,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#endif
show_val_kb(m, "PageTables: ",
global_node_page_state(NR_PAGETABLE));
+ show_val_kb(m, "SecPageTables: ",
+ global_node_page_state(NR_SECONDARY_PAGETABLE));
show_val_kb(m, "NFS_Unstable: ", 0);
show_val_kb(m, "Bounce: ",
@@ -162,7 +164,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
static int __init proc_meminfo_init(void)
{
- proc_create_single("meminfo", 0, NULL, meminfo_proc_show);
+ struct proc_dir_entry *pde;
+
+ pde = proc_create_single("meminfo", 0, NULL, meminfo_proc_show);
+ pde_make_permanent(pde);
return 0;
}
fs_initcall(proc_meminfo_init);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index a2873a617ae8..f2273b164535 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -91,6 +91,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
}
static const struct proc_ops kpagecount_proc_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
.proc_lseek = mem_lseek,
.proc_read = kpagecount_read,
};
@@ -268,6 +269,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
}
static const struct proc_ops kpageflags_proc_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
.proc_lseek = mem_lseek,
.proc_read = kpageflags_read,
};
@@ -322,6 +324,7 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
}
static const struct proc_ops kpagecgroup_proc_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
.proc_lseek = mem_lseek,
.proc_read = kpagecgroup_read,
};
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 021e83fe831f..48f2d60bd78a 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -28,13 +28,6 @@ static const struct inode_operations proc_sys_inode_operations;
static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;
-/* shared constants to be used in various sysctls */
-const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
-EXPORT_SYMBOL(sysctl_vals);
-
-const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
-EXPORT_SYMBOL_GPL(sysctl_long_vals);
-
/* Support for permanently empty directories */
struct ctl_table sysctl_mount_point[] = {
@@ -1246,7 +1239,7 @@ static bool get_links(struct ctl_dir *dir,
static int insert_links(struct ctl_table_header *head)
{
struct ctl_table_set *root_set = &sysctl_table_root.default_set;
- struct ctl_dir *core_parent = NULL;
+ struct ctl_dir *core_parent;
struct ctl_table_header *links;
int err;
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 12901dcf57e2..f4616083faef 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -3,6 +3,7 @@
#include <linux/kernel_stat.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include "internal.h"
/*
* /proc/softirqs ... display the number of softirqs
@@ -27,7 +28,10 @@ static int show_softirqs(struct seq_file *p, void *v)
static int __init proc_softirqs_init(void)
{
- proc_create_single("softirqs", 0, NULL, show_softirqs);
+ struct proc_dir_entry *pde;
+
+ pde = proc_create_single("softirqs", 0, NULL, show_softirqs);
+ pde_make_permanent(pde);
return 0;
}
fs_initcall(proc_softirqs_init);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4e0023643f8b..8a74cdcc9af0 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/pagewalk.h>
-#include <linux/vmacache.h>
#include <linux/mm_inline.h>
#include <linux/hugetlb.h>
#include <linux/huge_mm.h>
@@ -124,12 +123,26 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
}
#endif
+static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv,
+ loff_t *ppos)
+{
+ struct vm_area_struct *vma = vma_next(&priv->iter);
+
+ if (vma) {
+ *ppos = vma->vm_start;
+ } else {
+ *ppos = -2UL;
+ vma = get_gate_vma(priv->mm);
+ }
+
+ return vma;
+}
+
static void *m_start(struct seq_file *m, loff_t *ppos)
{
struct proc_maps_private *priv = m->private;
unsigned long last_addr = *ppos;
struct mm_struct *mm;
- struct vm_area_struct *vma;
/* See m_next(). Zero at the start or after lseek. */
if (last_addr == -1UL)
@@ -153,31 +166,21 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
return ERR_PTR(-EINTR);
}
+ vma_iter_init(&priv->iter, mm, last_addr);
hold_task_mempolicy(priv);
- priv->tail_vma = get_gate_vma(mm);
-
- vma = find_vma(mm, last_addr);
- if (vma)
- return vma;
+ if (last_addr == -2UL)
+ return get_gate_vma(mm);
- return priv->tail_vma;
+ return proc_get_vma(priv, ppos);
}
static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
{
- struct proc_maps_private *priv = m->private;
- struct vm_area_struct *next, *vma = v;
-
- if (vma == priv->tail_vma)
- next = NULL;
- else if (vma->vm_next)
- next = vma->vm_next;
- else
- next = priv->tail_vma;
-
- *ppos = next ? next->vm_start : -1UL;
-
- return next;
+ if (*ppos == -2UL) {
+ *ppos = -1UL;
+ return NULL;
+ }
+ return proc_get_vma(m->private, ppos);
}
static void m_stop(struct seq_file *m, void *v)
@@ -864,7 +867,7 @@ static int show_smap(struct seq_file *m, void *v)
__show_smap(m, &mss, false);
seq_printf(m, "THPeligible: %d\n",
- hugepage_vma_check(vma, vma->vm_flags, true, false));
+ hugepage_vma_check(vma, vma->vm_flags, true, false, true));
if (arch_pkeys_enabled())
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
@@ -877,16 +880,16 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
struct mem_size_stats mss;
- struct mm_struct *mm;
+ struct mm_struct *mm = priv->mm;
struct vm_area_struct *vma;
- unsigned long last_vma_end = 0;
+ unsigned long vma_start = 0, last_vma_end = 0;
int ret = 0;
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
priv->task = get_proc_task(priv->inode);
if (!priv->task)
return -ESRCH;
- mm = priv->mm;
if (!mm || !mmget_not_zero(mm)) {
ret = -ESRCH;
goto out_put_task;
@@ -899,8 +902,13 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
goto out_put_mm;
hold_task_mempolicy(priv);
+ vma = mas_find(&mas, ULONG_MAX);
+
+ if (unlikely(!vma))
+ goto empty_set;
- for (vma = priv->mm->mmap; vma;) {
+ vma_start = vma->vm_start;
+ do {
smap_gather_stats(vma, &mss, 0);
last_vma_end = vma->vm_end;
@@ -909,6 +917,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
* access it for write request.
*/
if (mmap_lock_is_contended(mm)) {
+ mas_pause(&mas);
mmap_read_unlock(mm);
ret = mmap_read_lock_killable(mm);
if (ret) {
@@ -952,7 +961,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
* contains last_vma_end.
* Iterate VMA' from last_vma_end.
*/
- vma = find_vma(mm, last_vma_end - 1);
+ vma = mas_find(&mas, ULONG_MAX);
/* Case 3 above */
if (!vma)
break;
@@ -966,11 +975,10 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
smap_gather_stats(vma, &mss, last_vma_end);
}
/* Case 2 above */
- vma = vma->vm_next;
- }
+ } while ((vma = mas_find(&mas, ULONG_MAX)) != NULL);
- show_vma_header_prefix(m, priv->mm->mmap->vm_start,
- last_vma_end, 0, 0, 0, 0);
+empty_set:
+ show_vma_header_prefix(m, vma_start, last_vma_end, 0, 0, 0, 0);
seq_pad(m, ' ');
seq_puts(m, "[rollup]\n");
@@ -1263,6 +1271,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
return -ESRCH;
mm = get_task_mm(task);
if (mm) {
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
struct mmu_notifier_range range;
struct clear_refs_private cp = {
.type = type,
@@ -1282,7 +1291,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
}
if (type == CLEAR_REFS_SOFT_DIRTY) {
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ mas_for_each(&mas, vma, ULONG_MAX) {
if (!(vma->vm_flags & VM_SOFTDIRTY))
continue;
vma->vm_flags &= ~VM_SOFTDIRTY;
@@ -1294,8 +1303,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
0, NULL, mm, 0, -1UL);
mmu_notifier_invalidate_range_start(&range);
}
- walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
- &cp);
+ walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
if (type == CLEAR_REFS_SOFT_DIRTY) {
mmu_notifier_invalidate_range_end(&range);
flush_tlb_mm(mm);
@@ -1418,9 +1426,19 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
if (pte_swp_uffd_wp(pte))
flags |= PM_UFFD_WP;
entry = pte_to_swp_entry(pte);
- if (pm->show_pfn)
+ if (pm->show_pfn) {
+ pgoff_t offset;
+ /*
+ * For PFN swap offsets, keeping the offset field
+ * to be PFN only to be compatible with old smaps.
+ */
+ if (is_pfn_swap_entry(entry))
+ offset = swp_offset_pfn(entry);
+ else
+ offset = swp_offset(entry);
frame = swp_type(entry) |
- (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+ (offset << MAX_SWAPFILES_SHIFT);
+ }
flags |= PM_SWAP;
migration = is_migration_entry(entry);
if (is_pfn_swap_entry(entry))
@@ -1477,7 +1495,11 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
unsigned long offset;
if (pm->show_pfn) {
- offset = swp_offset(entry) +
+ if (is_pfn_swap_entry(entry))
+ offset = swp_offset_pfn(entry);
+ else
+ offset = swp_offset(entry);
+ offset = offset +
((addr & ~PMD_MASK) >> PAGE_SHIFT);
frame = swp_type(entry) |
(offset << MAX_SWAPFILES_SHIFT);
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index a6d21fc0033c..2fd06f52b6a4 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -20,15 +20,13 @@
*/
void task_mem(struct seq_file *m, struct mm_struct *mm)
{
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
struct vm_region *region;
- struct rb_node *p;
unsigned long bytes = 0, sbytes = 0, slack = 0, size;
-
- mmap_read_lock(mm);
- for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
- vma = rb_entry(p, struct vm_area_struct, vm_rb);
+ mmap_read_lock(mm);
+ for_each_vma(vmi, vma) {
bytes += kobjsize(vma);
region = vma->vm_region;
@@ -82,15 +80,13 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
unsigned long task_vsize(struct mm_struct *mm)
{
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
- struct rb_node *p;
unsigned long vsize = 0;
mmap_read_lock(mm);
- for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
- vma = rb_entry(p, struct vm_area_struct, vm_rb);
+ for_each_vma(vmi, vma)
vsize += vma->vm_end - vma->vm_start;
- }
mmap_read_unlock(mm);
return vsize;
}
@@ -99,14 +95,13 @@ unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
struct vm_region *region;
- struct rb_node *p;
unsigned long size = kobjsize(mm);
mmap_read_lock(mm);
- for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
- vma = rb_entry(p, struct vm_area_struct, vm_rb);
+ for_each_vma(vmi, vma) {
size += kobjsize(vma);
region = vma->vm_region;
if (region) {
@@ -190,17 +185,19 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
*/
static int show_map(struct seq_file *m, void *_p)
{
- struct rb_node *p = _p;
-
- return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
+ return nommu_vma_show(m, _p);
}
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_maps_private *priv = m->private;
struct mm_struct *mm;
- struct rb_node *p;
- loff_t n = *pos;
+ struct vm_area_struct *vma;
+ unsigned long addr = *pos;
+
+ /* See m_next(). Zero at the start or after lseek. */
+ if (addr == -1UL)
+ return NULL;
/* pin the task and mm whilst we play with them */
priv->task = get_proc_task(priv->inode);
@@ -216,10 +213,10 @@ static void *m_start(struct seq_file *m, loff_t *pos)
return ERR_PTR(-EINTR);
}
- /* start from the Nth VMA */
- for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
- if (n-- == 0)
- return p;
+ /* start the next element from addr */
+ vma = find_vma(mm, addr);
+ if (vma)
+ return vma;
mmap_read_unlock(mm);
mmput(mm);
@@ -242,10 +239,10 @@ static void m_stop(struct seq_file *m, void *_vml)
static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
{
- struct rb_node *p = _p;
+ struct vm_area_struct *vma = _p;
- (*pos)++;
- return p ? rb_next(p) : NULL;
+ *pos = vma->vm_end;
+ return find_vma(vma->vm_mm, vma->vm_end);
}
static const struct seq_operations proc_pid_maps_ops = {
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index deb99bc9b7e6..b5343d209381 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -7,6 +7,7 @@
#include <linux/time.h>
#include <linux/time_namespace.h>
#include <linux/kernel_stat.h>
+#include "internal.h"
static int uptime_proc_show(struct seq_file *m, void *v)
{
@@ -39,7 +40,10 @@ static int uptime_proc_show(struct seq_file *m, void *v)
static int __init proc_uptime_init(void)
{
- proc_create_single("uptime", 0, NULL, uptime_proc_show);
+ struct proc_dir_entry *pde;
+
+ pde = proc_create_single("uptime", 0, NULL, uptime_proc_show);
+ pde_make_permanent(pde);
return 0;
}
fs_initcall(proc_uptime_init);
diff --git a/fs/proc/version.c b/fs/proc/version.c
index b449f186577f..02e3c3cd4a9a 100644
--- a/fs/proc/version.c
+++ b/fs/proc/version.c
@@ -5,6 +5,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/utsname.h>
+#include "internal.h"
static int version_proc_show(struct seq_file *m, void *v)
{
@@ -17,7 +18,10 @@ static int version_proc_show(struct seq_file *m, void *v)
static int __init proc_version_init(void)
{
- proc_create_single("version", 0, NULL, version_proc_show);
+ struct proc_dir_entry *pde;
+
+ pde = proc_create_single("version", 0, NULL, version_proc_show);
+ pde_make_permanent(pde);
return 0;
}
fs_initcall(proc_version_init);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index b2fd3c20e7c2..0c034ea39954 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -28,14 +28,11 @@
#include <linux/crypto.h>
#include <linux/string.h>
#include <linux/timer.h>
-#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/jiffies.h>
#include <linux/workqueue.h>
-#include <crypto/acompress.h>
-
#include "internal.h"
/*
@@ -93,8 +90,7 @@ module_param(compress, charp, 0444);
MODULE_PARM_DESC(compress, "compression to use");
/* Compression parameters */
-static struct crypto_acomp *tfm;
-static struct acomp_req *creq;
+static struct crypto_comp *tfm;
struct pstore_zbackend {
int (*zbufsize)(size_t size);
@@ -272,21 +268,12 @@ static const struct pstore_zbackend zbackends[] = {
static int pstore_compress(const void *in, void *out,
unsigned int inlen, unsigned int outlen)
{
- struct scatterlist src, dst;
int ret;
if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS))
return -EINVAL;
- sg_init_table(&src, 1);
- sg_set_buf(&src, in, inlen);
-
- sg_init_table(&dst, 1);
- sg_set_buf(&dst, out, outlen);
-
- acomp_request_set_params(creq, &src, &dst, inlen, outlen);
-
- ret = crypto_acomp_compress(creq);
+ ret = crypto_comp_compress(tfm, in, inlen, out, &outlen);
if (ret) {
pr_err("crypto_comp_compress failed, ret = %d!\n", ret);
return ret;
@@ -297,7 +284,7 @@ static int pstore_compress(const void *in, void *out,
static void allocate_buf_for_compression(void)
{
- struct crypto_acomp *acomp;
+ struct crypto_comp *ctx;
int size;
char *buf;
@@ -309,7 +296,7 @@ static void allocate_buf_for_compression(void)
if (!psinfo || tfm)
return;
- if (!crypto_has_acomp(zbackend->name, 0, CRYPTO_ALG_ASYNC)) {
+ if (!crypto_has_comp(zbackend->name, 0, 0)) {
pr_err("Unknown compression: %s\n", zbackend->name);
return;
}
@@ -328,24 +315,16 @@ static void allocate_buf_for_compression(void)
return;
}
- acomp = crypto_alloc_acomp(zbackend->name, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR_OR_NULL(acomp)) {
+ ctx = crypto_alloc_comp(zbackend->name, 0, 0);
+ if (IS_ERR_OR_NULL(ctx)) {
kfree(buf);
pr_err("crypto_alloc_comp('%s') failed: %ld\n", zbackend->name,
- PTR_ERR(acomp));
- return;
- }
-
- creq = acomp_request_alloc(acomp);
- if (!creq) {
- crypto_free_acomp(acomp);
- kfree(buf);
- pr_err("acomp_request_alloc('%s') failed\n", zbackend->name);
+ PTR_ERR(ctx));
return;
}
/* A non-NULL big_oops_buf indicates compression is available. */
- tfm = acomp;
+ tfm = ctx;
big_oops_buf_sz = size;
big_oops_buf = buf;
@@ -355,8 +334,7 @@ static void allocate_buf_for_compression(void)
static void free_buf_for_compression(void)
{
if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && tfm) {
- acomp_request_free(creq);
- crypto_free_acomp(tfm);
+ crypto_free_comp(tfm);
tfm = NULL;
}
kfree(big_oops_buf);
@@ -693,8 +671,6 @@ static void decompress_record(struct pstore_record *record)
int ret;
int unzipped_len;
char *unzipped, *workspace;
- struct acomp_req *dreq;
- struct scatterlist src, dst;
if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !record->compressed)
return;
@@ -718,30 +694,16 @@ static void decompress_record(struct pstore_record *record)
if (!workspace)
return;
- dreq = acomp_request_alloc(tfm);
- if (!dreq) {
- kfree(workspace);
- return;
- }
-
- sg_init_table(&src, 1);
- sg_set_buf(&src, record->buf, record->size);
-
- sg_init_table(&dst, 1);
- sg_set_buf(&dst, workspace, unzipped_len);
-
- acomp_request_set_params(dreq, &src, &dst, record->size, unzipped_len);
-
/* After decompression "unzipped_len" is almost certainly smaller. */
- ret = crypto_acomp_decompress(dreq);
+ ret = crypto_comp_decompress(tfm, record->buf, record->size,
+ workspace, &unzipped_len);
if (ret) {
- pr_err("crypto_acomp_decompress failed, ret = %d!\n", ret);
+ pr_err("crypto_comp_decompress failed, ret = %d!\n", ret);
kfree(workspace);
return;
}
/* Append ECC notice to decompressed buffer. */
- unzipped_len = dreq->dlen;
memcpy(workspace + unzipped_len, record->buf + record->size,
record->ecc_notice_size);
@@ -749,7 +711,6 @@ static void decompress_record(struct pstore_record *record)
unzipped = kmemdup(workspace, unzipped_len + record->ecc_notice_size,
GFP_KERNEL);
kfree(workspace);
- acomp_request_free(dreq);
if (!unzipped)
return;
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index b9895afca9d1..85b2fa3b211c 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -470,10 +470,8 @@ out2:
out1:
iput(sbi->inodes);
out:
- if (bh1)
- brelse(bh1);
- if (bh2)
- brelse(bh2);
+ brelse(bh1);
+ brelse(bh2);
outnobh:
kfree(qs);
s->s_fs_info = NULL;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 5f2405994280..0f1493e0f6d0 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -71,6 +71,40 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
return ret;
}
+static inline int do_check_range(struct super_block *sb, const char *val_name,
+ uint val, uint min_val, uint max_val)
+{
+ if (val < min_val || val > max_val) {
+ quota_error(sb, "Getting %s %u out of range %u-%u",
+ val_name, val, min_val, max_val);
+ return -EUCLEAN;
+ }
+
+ return 0;
+}
+
+static int check_dquot_block_header(struct qtree_mem_dqinfo *info,
+ struct qt_disk_dqdbheader *dh)
+{
+ int err = 0;
+
+ err = do_check_range(info->dqi_sb, "dqdh_next_free",
+ le32_to_cpu(dh->dqdh_next_free), 0,
+ info->dqi_blocks - 1);
+ if (err)
+ return err;
+ err = do_check_range(info->dqi_sb, "dqdh_prev_free",
+ le32_to_cpu(dh->dqdh_prev_free), 0,
+ info->dqi_blocks - 1);
+ if (err)
+ return err;
+ err = do_check_range(info->dqi_sb, "dqdh_entries",
+ le16_to_cpu(dh->dqdh_entries), 0,
+ qtree_dqstr_in_blk(info));
+
+ return err;
+}
+
/* Remove empty block from list and return it */
static int get_free_dqblk(struct qtree_mem_dqinfo *info)
{
@@ -85,6 +119,9 @@ static int get_free_dqblk(struct qtree_mem_dqinfo *info)
ret = read_blk(info, blk, buf);
if (ret < 0)
goto out_buf;
+ ret = check_dquot_block_header(info, dh);
+ if (ret)
+ goto out_buf;
info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
}
else {
@@ -232,6 +269,9 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
*err = read_blk(info, blk, buf);
if (*err < 0)
goto out_buf;
+ *err = check_dquot_block_header(info, dh);
+ if (*err)
+ goto out_buf;
} else {
blk = get_free_dqblk(info);
if ((int)blk < 0) {
@@ -313,6 +353,10 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
}
ref = (__le32 *)buf;
newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+ ret = do_check_range(dquot->dq_sb, "block", newblk, 0,
+ info->dqi_blocks - 1);
+ if (ret)
+ goto out_buf;
if (!newblk)
newson = 1;
if (depth == info->dqi_qtree_depth - 1) {
@@ -424,6 +468,9 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
goto out_buf;
}
dh = (struct qt_disk_dqdbheader *)buf;
+ ret = check_dquot_block_header(info, dh);
+ if (ret)
+ goto out_buf;
le16_add_cpu(&dh->dqdh_entries, -1);
if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
ret = remove_free_dqentry(info, buf, blk);
@@ -480,12 +527,10 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
goto out_buf;
}
newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
- if (newblk < QT_TREEOFF || newblk >= info->dqi_blocks) {
- quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)",
- newblk, info->dqi_blocks);
- ret = -EUCLEAN;
+ ret = do_check_range(dquot->dq_sb, "block", newblk, QT_TREEOFF,
+ info->dqi_blocks - 1);
+ if (ret)
goto out_buf;
- }
if (depth == info->dqi_qtree_depth - 1) {
ret = free_dqentry(info, dquot, newblk);
@@ -586,12 +631,10 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
if (!blk) /* No reference? */
goto out_buf;
- if (blk < QT_TREEOFF || blk >= info->dqi_blocks) {
- quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)",
- blk, info->dqi_blocks);
- ret = -EUCLEAN;
+ ret = do_check_range(dquot->dq_sb, "block", blk, QT_TREEOFF,
+ info->dqi_blocks - 1);
+ if (ret)
goto out_buf;
- }
if (depth < info->dqi_qtree_depth - 1)
ret = find_tree_dqentry(info, dquot, blk, depth+1);
@@ -705,15 +748,21 @@ static int find_next_id(struct qtree_mem_dqinfo *info, qid_t *id,
goto out_buf;
}
for (i = __get_index(info, *id, depth); i < epb; i++) {
- if (ref[i] == cpu_to_le32(0)) {
+ uint blk_no = le32_to_cpu(ref[i]);
+
+ if (blk_no == 0) {
*id += level_inc;
continue;
}
+ ret = do_check_range(info->dqi_sb, "block", blk_no, 0,
+ info->dqi_blocks - 1);
+ if (ret)
+ goto out_buf;
if (depth == info->dqi_qtree_depth - 1) {
ret = 0;
goto out_buf;
}
- ret = find_next_id(info, id, le32_to_cpu(ref[i]), depth + 1);
+ ret = find_next_id(info, id, blk_no, depth + 1);
if (ret != -ENOENT)
break;
}
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index ba3525ccc27e..cb240eac5036 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -203,9 +203,9 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
- unsigned long maxpages, lpages, nr, loop, ret;
+ unsigned long maxpages, lpages, nr_folios, loop, ret, nr_pages, pfn;
struct inode *inode = file_inode(file);
- struct page **pages = NULL, **ptr, *page;
+ struct folio_batch fbatch;
loff_t isize;
/* the mapping mustn't extend beyond the EOF */
@@ -221,31 +221,39 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
goto out;
/* gang-find the pages */
- pages = kcalloc(lpages, sizeof(struct page *), GFP_KERNEL);
- if (!pages)
- goto out_free;
-
- nr = find_get_pages_contig(inode->i_mapping, pgoff, lpages, pages);
- if (nr != lpages)
- goto out_free_pages; /* leave if some pages were missing */
+ folio_batch_init(&fbatch);
+ nr_pages = 0;
+repeat:
+ nr_folios = filemap_get_folios_contig(inode->i_mapping, &pgoff,
+ ULONG_MAX, &fbatch);
+ if (!nr_folios) {
+ ret = -ENOSYS;
+ return ret;
+ }
+ if (ret == -ENOSYS) {
+ ret = (unsigned long) folio_address(fbatch.folios[0]);
+ pfn = folio_pfn(fbatch.folios[0]);
+ }
/* check the pages for physical adjacency */
- ptr = pages;
- page = *ptr++;
- page++;
- for (loop = lpages; loop > 1; loop--)
- if (*ptr++ != page++)
- goto out_free_pages;
+ for (loop = 0; loop < nr_folios; loop++) {
+ if (pfn + nr_pages != folio_pfn(fbatch.folios[loop])) {
+ ret = -ENOSYS;
+ goto out_free; /* leave if not physical adjacent */
+ }
+ nr_pages += folio_nr_pages(fbatch.folios[loop]);
+ if (nr_pages >= lpages)
+ goto out_free; /* successfully found desired pages*/
+ }
+ if (nr_pages < lpages) {
+ folio_batch_release(&fbatch);
+ goto repeat; /* loop if pages are missing */
+ }
/* okay - all conditions fulfilled */
- ret = (unsigned long) page_address(pages[0]);
-out_free_pages:
- ptr = pages;
- for (loop = nr; loop > 0; loop--)
- put_page(*ptr++);
out_free:
- kfree(pages);
+ folio_batch_release(&fbatch);
out:
return ret;
}
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index bc66d0173e33..b3257e852820 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -146,15 +146,15 @@ static int ramfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
}
static int ramfs_tmpfile(struct user_namespace *mnt_userns,
- struct inode *dir, struct dentry *dentry, umode_t mode)
+ struct inode *dir, struct file *file, umode_t mode)
{
struct inode *inode;
inode = ramfs_get_inode(dir->i_sb, dir, mode, 0);
if (!inode)
return -ENOSPC;
- d_tmpfile(dentry, inode);
- return 0;
+ d_tmpfile(file, inode);
+ return finish_open_simple(file, 0);
}
static const struct inode_operations ramfs_dir_inode_operations = {
diff --git a/fs/read_write.c b/fs/read_write.c
index 1a261dcf1778..328ce8cf9a85 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -496,14 +496,9 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
}
/* caller is responsible for file_start_write/file_end_write */
-ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
+ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos)
{
- struct kvec iov = {
- .iov_base = (void *)buf,
- .iov_len = min_t(size_t, count, MAX_RW_COUNT),
- };
struct kiocb kiocb;
- struct iov_iter iter;
ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE)))
@@ -519,8 +514,7 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = pos ? *pos : 0;
- iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len);
- ret = file->f_op->write_iter(&kiocb, &iter);
+ ret = file->f_op->write_iter(&kiocb, from);
if (ret > 0) {
if (pos)
*pos = kiocb.ki_pos;
@@ -530,6 +524,18 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
inc_syscw(current);
return ret;
}
+
+/* caller is responsible for file_start_write/file_end_write */
+ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
+{
+ struct kvec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = min_t(size_t, count, MAX_RW_COUNT),
+ };
+ struct iov_iter iter;
+ iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len);
+ return __kernel_write_iter(file, &iter, pos);
+}
/*
* This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()",
* but autofs is one of the few internal kernel users that actually
diff --git a/fs/readdir.c b/fs/readdir.c
index 09e8ed7d4161..9c53edb60c03 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -140,7 +140,7 @@ struct readdir_callback {
int result;
};
-static int fillonedir(struct dir_context *ctx, const char *name, int namlen,
+static bool fillonedir(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct readdir_callback *buf =
@@ -149,14 +149,14 @@ static int fillonedir(struct dir_context *ctx, const char *name, int namlen,
unsigned long d_ino;
if (buf->result)
- return -EINVAL;
+ return false;
buf->result = verify_dirent_name(name, namlen);
- if (buf->result < 0)
- return buf->result;
+ if (buf->result)
+ return false;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
buf->result = -EOVERFLOW;
- return -EOVERFLOW;
+ return false;
}
buf->result++;
dirent = buf->dirent;
@@ -169,12 +169,12 @@ static int fillonedir(struct dir_context *ctx, const char *name, int namlen,
unsafe_put_user(namlen, &dirent->d_namlen, efault_end);
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
user_write_access_end();
- return 0;
+ return true;
efault_end:
user_write_access_end();
efault:
buf->result = -EFAULT;
- return -EFAULT;
+ return false;
}
SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
@@ -219,7 +219,7 @@ struct getdents_callback {
int error;
};
-static int filldir(struct dir_context *ctx, const char *name, int namlen,
+static bool filldir(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct linux_dirent __user *dirent, *prev;
@@ -232,18 +232,18 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
- return buf->error;
+ return false;
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
- return -EINVAL;
+ return false;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
buf->error = -EOVERFLOW;
- return -EOVERFLOW;
+ return false;
}
prev_reclen = buf->prev_reclen;
if (prev_reclen && signal_pending(current))
- return -EINTR;
+ return false;
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
if (!user_write_access_begin(prev, reclen + prev_reclen))
@@ -260,12 +260,12 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
buf->current_dir = (void __user *)dirent + reclen;
buf->prev_reclen = reclen;
buf->count -= reclen;
- return 0;
+ return true;
efault_end:
user_write_access_end();
efault:
buf->error = -EFAULT;
- return -EFAULT;
+ return false;
}
SYSCALL_DEFINE3(getdents, unsigned int, fd,
@@ -307,7 +307,7 @@ struct getdents_callback64 {
int error;
};
-static int filldir64(struct dir_context *ctx, const char *name, int namlen,
+static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct linux_dirent64 __user *dirent, *prev;
@@ -319,13 +319,13 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
- return buf->error;
+ return false;
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
- return -EINVAL;
+ return false;
prev_reclen = buf->prev_reclen;
if (prev_reclen && signal_pending(current))
- return -EINTR;
+ return false;
dirent = buf->current_dir;
prev = (void __user *)dirent - prev_reclen;
if (!user_write_access_begin(prev, reclen + prev_reclen))
@@ -342,13 +342,13 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
buf->prev_reclen = reclen;
buf->current_dir = (void __user *)dirent + reclen;
buf->count -= reclen;
- return 0;
+ return true;
efault_end:
user_write_access_end();
efault:
buf->error = -EFAULT;
- return -EFAULT;
+ return false;
}
SYSCALL_DEFINE3(getdents64, unsigned int, fd,
@@ -397,7 +397,7 @@ struct compat_readdir_callback {
int result;
};
-static int compat_fillonedir(struct dir_context *ctx, const char *name,
+static bool compat_fillonedir(struct dir_context *ctx, const char *name,
int namlen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -407,14 +407,14 @@ static int compat_fillonedir(struct dir_context *ctx, const char *name,
compat_ulong_t d_ino;
if (buf->result)
- return -EINVAL;
+ return false;
buf->result = verify_dirent_name(name, namlen);
- if (buf->result < 0)
- return buf->result;
+ if (buf->result)
+ return false;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
buf->result = -EOVERFLOW;
- return -EOVERFLOW;
+ return false;
}
buf->result++;
dirent = buf->dirent;
@@ -427,12 +427,12 @@ static int compat_fillonedir(struct dir_context *ctx, const char *name,
unsafe_put_user(namlen, &dirent->d_namlen, efault_end);
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
user_write_access_end();
- return 0;
+ return true;
efault_end:
user_write_access_end();
efault:
buf->result = -EFAULT;
- return -EFAULT;
+ return false;
}
COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
@@ -471,7 +471,7 @@ struct compat_getdents_callback {
int error;
};
-static int compat_filldir(struct dir_context *ctx, const char *name, int namlen,
+static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct compat_linux_dirent __user *dirent, *prev;
@@ -484,18 +484,18 @@ static int compat_filldir(struct dir_context *ctx, const char *name, int namlen,
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
- return buf->error;
+ return false;
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
- return -EINVAL;
+ return false;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
buf->error = -EOVERFLOW;
- return -EOVERFLOW;
+ return false;
}
prev_reclen = buf->prev_reclen;
if (prev_reclen && signal_pending(current))
- return -EINTR;
+ return false;
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
if (!user_write_access_begin(prev, reclen + prev_reclen))
@@ -511,12 +511,12 @@ static int compat_filldir(struct dir_context *ctx, const char *name, int namlen,
buf->prev_reclen = reclen;
buf->current_dir = (void __user *)dirent + reclen;
buf->count -= reclen;
- return 0;
+ return true;
efault_end:
user_write_access_end();
efault:
buf->error = -EFAULT;
- return -EFAULT;
+ return false;
}
COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 94addfcefede..9f62da7471c9 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -868,7 +868,7 @@ loop_next:
*/
if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
spin_unlock(lock);
- ll_rw_block(REQ_OP_WRITE, 1, &bh);
+ write_dirty_buffer(bh, 0);
spin_lock(lock);
}
put_bh(bh);
@@ -1054,7 +1054,7 @@ static int flush_commit_list(struct super_block *s,
if (tbh) {
if (buffer_dirty(tbh)) {
depth = reiserfs_write_unlock_nested(s);
- ll_rw_block(REQ_OP_WRITE, 1, &tbh);
+ write_dirty_buffer(tbh, 0);
reiserfs_write_lock_nested(s, depth);
}
put_bh(tbh) ;
@@ -2240,7 +2240,7 @@ abort_replay:
}
}
/* read in the log blocks, memcpy to the corresponding real block */
- ll_rw_block(REQ_OP_READ, get_desc_trans_len(desc), log_blocks);
+ bh_read_batch(get_desc_trans_len(desc), log_blocks);
for (i = 0; i < get_desc_trans_len(desc); i++) {
wait_on_buffer(log_blocks[i]);
@@ -2342,10 +2342,11 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
} else
bhlist[j++] = bh;
}
- ll_rw_block(REQ_OP_READ, j, bhlist);
+ bh = bhlist[0];
+ bh_read_nowait(bh, 0);
+ bh_readahead_batch(j - 1, &bhlist[1], 0);
for (i = 1; i < j; i++)
brelse(bhlist[i]);
- bh = bhlist[0];
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 30319dc33c18..84a194b77f19 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -456,7 +456,7 @@ static int print_internal(struct buffer_head *bh, int first, int last)
to = B_NR_ITEMS(bh);
} else {
from = first;
- to = last < B_NR_ITEMS(bh) ? last : B_NR_ITEMS(bh);
+ to = min_t(int, last, B_NR_ITEMS(bh));
}
reiserfs_printk("INTERNAL NODE (%ld) contains %z\n", bh->b_blocknr, bh);
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 4a7cb16e9345..3dba8acf4e83 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -411,7 +411,7 @@ int reiserfs_proc_info_init(struct super_block *sb)
char *s;
/* Some block devices use /'s */
- strlcpy(b, sb->s_id, BDEVNAME_SIZE);
+ strscpy(b, sb->s_id, BDEVNAME_SIZE);
s = strchr(b, '/');
if (s)
*s = '!';
@@ -441,7 +441,7 @@ int reiserfs_proc_info_done(struct super_block *sb)
char *s;
/* Some block devices use /'s */
- strlcpy(b, sb->s_id, BDEVNAME_SIZE);
+ strscpy(b, sb->s_id, BDEVNAME_SIZE);
s = strchr(b, '/');
if (s)
*s = '!';
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 8096c74c38ac..7b498a0d060b 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -97,7 +97,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
* using the copy_size var below allows this code to work for
* both shrinking and expanding the FS.
*/
- copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr;
+ copy_size = min(bmap_nr_new, bmap_nr);
copy_size =
copy_size * sizeof(struct reiserfs_list_bitmap_node *);
for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 9a293609a022..84c12a1947b2 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -579,7 +579,7 @@ static int search_by_key_reada(struct super_block *s,
if (!buffer_uptodate(bh[j])) {
if (depth == -1)
depth = reiserfs_write_unlock_nested(s);
- ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, bh + j);
+ bh_readahead(bh[j], REQ_RAHEAD);
}
brelse(bh[j]);
}
@@ -685,7 +685,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key,
if (!buffer_uptodate(bh) && depth == -1)
depth = reiserfs_write_unlock_nested(sb);
- ll_rw_block(REQ_OP_READ, 1, &bh);
+ bh_read_nowait(bh, 0);
wait_on_buffer(bh);
if (depth != -1)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index c88cd2ce0665..929acce6e731 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1702,9 +1702,7 @@ static int read_super_block(struct super_block *s, int offset)
/* after journal replay, reread all bitmap and super blocks */
static int reread_meta_blocks(struct super_block *s)
{
- ll_rw_block(REQ_OP_READ, 1, &SB_BUFFER_WITH_SB(s));
- wait_on_buffer(SB_BUFFER_WITH_SB(s));
- if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
+ if (bh_read(SB_BUFFER_WITH_SB(s), 0) < 0) {
reiserfs_warning(s, "reiserfs-2504", "error reading the super");
return 1;
}
@@ -2504,9 +2502,7 @@ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
len = i_size - off;
toread = len;
while (toread > 0) {
- tocopy =
- sb->s_blocksize - offset <
- toread ? sb->s_blocksize - offset : toread;
+ tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
tmp_bh.b_state = 0;
/*
* Quota files are without tails so we can safely
@@ -2554,8 +2550,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
return -EIO;
}
while (towrite > 0) {
- tocopy = sb->s_blocksize - offset < towrite ?
- sb->s_blocksize - offset : towrite;
+ tocopy = min_t(unsigned long, sb->s_blocksize - offset, towrite);
tmp_bh.b_state = 0;
reiserfs_write_lock(sb);
err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 436641369283..8b2d52443f41 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -189,7 +189,7 @@ struct reiserfs_dentry_buf {
struct dentry *dentries[8];
};
-static int
+static bool
fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
loff_t offset, u64 ino, unsigned int d_type)
{
@@ -200,16 +200,16 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
WARN_ON_ONCE(!inode_is_locked(d_inode(dbuf->xadir)));
if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
- return -ENOSPC;
+ return false;
if (name[0] == '.' && (namelen < 2 ||
(namelen == 2 && name[1] == '.')))
- return 0;
+ return true;
dentry = lookup_one_len(name, dbuf->xadir, namelen);
if (IS_ERR(dentry)) {
dbuf->err = PTR_ERR(dentry);
- return PTR_ERR(dentry);
+ return false;
} else if (d_really_is_negative(dentry)) {
/* A directory entry exists, but no file? */
reiserfs_error(dentry->d_sb, "xattr-20003",
@@ -218,11 +218,11 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
dentry, dbuf->xadir);
dput(dentry);
dbuf->err = -EIO;
- return -EIO;
+ return false;
}
dbuf->dentries[dbuf->count++] = dentry;
- return 0;
+ return true;
}
static void
@@ -797,7 +797,7 @@ struct listxattr_buf {
struct dentry *dentry;
};
-static int listxattr_filler(struct dir_context *ctx, const char *name,
+static bool listxattr_filler(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -813,19 +813,19 @@ static int listxattr_filler(struct dir_context *ctx, const char *name,
name);
if (!handler /* Unsupported xattr name */ ||
(handler->list && !handler->list(b->dentry)))
- return 0;
+ return true;
size = namelen + 1;
if (b->buf) {
if (b->pos + size > b->size) {
b->pos = -ERANGE;
- return -ERANGE;
+ return false;
}
memcpy(b->buf + b->pos, name, namelen);
b->buf[b->pos + namelen] = 0;
}
b->pos += size;
}
- return 0;
+ return true;
}
/*
diff --git a/fs/smbfs_common/smb2pdu.h b/fs/smbfs_common/smb2pdu.h
index 2cab413fffee..7d605db3bb3b 100644
--- a/fs/smbfs_common/smb2pdu.h
+++ b/fs/smbfs_common/smb2pdu.h
@@ -1101,7 +1101,11 @@ struct smb2_change_notify_rsp {
#define SMB2_CREATE_REQUEST_LEASE "RqLs"
#define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2 "DH2Q"
#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 "DH2C"
-#define SMB2_CREATE_TAG_POSIX "\x93\xAD\x25\x50\x9C\xB4\x11\xE7\xB4\x23\x83\xDE\x96\x8B\xCD\x7C"
+#define SMB2_CREATE_TAG_POSIX "\x93\xAD\x25\x50\x9C\xB4\x11\xE7\xB4\x23\x83\xDE\x96\x8B\xCD\x7C"
+#define SMB2_CREATE_APP_INSTANCE_ID "\x45\xBC\xA6\x6A\xEF\xA7\xF7\x4A\x90\x08\xFA\x46\x2E\x14\x4D\x74"
+#define SMB2_CREATE_APP_INSTANCE_VERSION "\xB9\x82\xD0\xB7\x3B\x56\x07\x4F\xA0\x7B\x52\x4A\x81\x16\xA0\x10"
+#define SVHDX_OPEN_DEVICE_CONTEXT "\x9C\xCB\xCF\x9E\x04\xC1\xE6\x43\x98\x0E\x15\x8D\xA1\xF6\xEC\x83"
+#define SMB2_CREATE_TAG_AAPL "AAPL"
/* Flag (SMB3 open response) values */
#define SMB2_CREATE_FLAG_REPARSEPOINT 0x01
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index e56510964b22..8ba8c4c50770 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -506,8 +506,9 @@ static int squashfs_readahead_fragment(struct page **page,
squashfs_i(inode)->fragment_size);
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
+ int error = buffer->error;
- if (buffer->error)
+ if (error)
goto out;
expected += squashfs_i(inode)->fragment_offset;
@@ -529,7 +530,7 @@ static int squashfs_readahead_fragment(struct page **page,
out:
squashfs_cache_put(buffer);
- return buffer->error;
+ return error;
}
static void squashfs_readahead(struct readahead_control *ractl)
@@ -557,6 +558,13 @@ static void squashfs_readahead(struct readahead_control *ractl)
int res, bsize;
u64 block = 0;
unsigned int expected;
+ struct page *last_page;
+
+ expected = start >> msblk->block_log == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
+
+ max_pages = (expected + PAGE_SIZE - 1) >> PAGE_SHIFT;
nr_pages = __readahead_batch(ractl, pages, max_pages);
if (!nr_pages)
@@ -566,13 +574,10 @@ static void squashfs_readahead(struct readahead_control *ractl)
goto skip_pages;
index = pages[0]->index >> shift;
+
if ((pages[nr_pages - 1]->index >> shift) != index)
goto skip_pages;
- expected = index == file_end ?
- (i_size_read(inode) & (msblk->block_size - 1)) :
- msblk->block_size;
-
if (index == file_end && squashfs_i(inode)->fragment_block !=
SQUASHFS_INVALID_BLK) {
res = squashfs_readahead_fragment(pages, nr_pages,
@@ -593,15 +598,15 @@ static void squashfs_readahead(struct readahead_control *ractl)
res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
- squashfs_page_actor_free(actor);
+ last_page = squashfs_page_actor_free(actor);
if (res == expected) {
int bytes;
/* Last page (if present) may have trailing bytes not filled */
bytes = res % PAGE_SIZE;
- if (pages[nr_pages - 1]->index == file_end && bytes)
- memzero_page(pages[nr_pages - 1], bytes,
+ if (index == file_end && bytes && last_page)
+ memzero_page(last_page, bytes,
PAGE_SIZE - bytes);
for (i = 0; i < nr_pages; i++) {
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index 54b93bf4a25c..81af6c4ca115 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c
@@ -71,11 +71,13 @@ static void *handle_next_page(struct squashfs_page_actor *actor)
(actor->next_index != actor->page[actor->next_page]->index)) {
actor->next_index++;
actor->returned_pages++;
+ actor->last_page = NULL;
return actor->alloc_buffer ? actor->tmp_buffer : ERR_PTR(-ENOMEM);
}
actor->next_index++;
actor->returned_pages++;
+ actor->last_page = actor->page[actor->next_page];
return actor->pageaddr = kmap_local_page(actor->page[actor->next_page++]);
}
@@ -125,6 +127,7 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_
actor->returned_pages = 0;
actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1);
actor->pageaddr = NULL;
+ actor->last_page = NULL;
actor->alloc_buffer = msblk->decompressor->alloc_buffer;
actor->squashfs_first_page = direct_first_page;
actor->squashfs_next_page = direct_next_page;
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 95ffbb543d91..97d4983559b1 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -16,6 +16,7 @@ struct squashfs_page_actor {
void *(*squashfs_first_page)(struct squashfs_page_actor *);
void *(*squashfs_next_page)(struct squashfs_page_actor *);
void (*squashfs_finish_page)(struct squashfs_page_actor *);
+ struct page *last_page;
int pages;
int length;
int next_page;
@@ -29,10 +30,13 @@ extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
extern struct squashfs_page_actor *squashfs_page_actor_init_special(
struct squashfs_sb_info *msblk,
struct page **page, int pages, int length);
-static inline void squashfs_page_actor_free(struct squashfs_page_actor *actor)
+static inline struct page *squashfs_page_actor_free(struct squashfs_page_actor *actor)
{
+ struct page *last_page = actor->last_page;
+
kfree(actor->tmp_buffer);
kfree(actor);
+ return last_page;
}
static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
{
diff --git a/fs/stat.c b/fs/stat.c
index 9ced8860e0f3..ef50573c72a2 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -5,6 +5,7 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*/
+#include <linux/blkdev.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/errno.h>
@@ -230,11 +231,22 @@ retry:
goto out;
error = vfs_getattr(&path, stat, request_mask, flags);
+
stat->mnt_id = real_mount(path.mnt)->mnt_id;
stat->result_mask |= STATX_MNT_ID;
+
if (path.mnt->mnt_root == path.dentry)
stat->attributes |= STATX_ATTR_MOUNT_ROOT;
stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
+
+ /* Handle STATX_DIOALIGN for block devices. */
+ if (request_mask & STATX_DIOALIGN) {
+ struct inode *inode = d_backing_inode(path.dentry);
+
+ if (S_ISBLK(inode->i_mode))
+ bdev_statx_dioalign(inode, stat);
+ }
+
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -611,6 +623,8 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
tmp.stx_dev_major = MAJOR(stat->dev);
tmp.stx_dev_minor = MINOR(stat->dev);
tmp.stx_mnt_id = stat->mnt_id;
+ tmp.stx_dio_mem_align = stat->dio_mem_align;
+ tmp.stx_dio_offset_align = stat->dio_offset_align;
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}
diff --git a/fs/super.c b/fs/super.c
index 734ed584a946..8d39e4f11cfa 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -291,7 +291,7 @@ static void __put_super(struct super_block *s)
WARN_ON(s->s_inode_lru.node);
WARN_ON(!list_empty(&s->s_mounts));
security_sb_free(s);
- fscrypt_sb_free(s);
+ fscrypt_destroy_keyring(s);
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
call_rcu(&s->rcu, destroy_super_rcu);
@@ -480,6 +480,7 @@ void generic_shutdown_super(struct super_block *sb)
evict_inodes(sb);
/* only nonzero refcount inodes can have marks */
fsnotify_sb_delete(sb);
+ fscrypt_destroy_keyring(sb);
security_sb_delete(sb);
if (sb->s_dio_done_wq) {
diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c
index c57b46a352d8..3125e76376ee 100644
--- a/fs/ubifs/crypto.c
+++ b/fs/ubifs/crypto.c
@@ -24,6 +24,17 @@ static bool ubifs_crypt_empty_dir(struct inode *inode)
return ubifs_check_dir_empty(inode) == 0;
}
+/**
+ * ubifs_encrypt - Encrypt data.
+ * @inode: inode which refers to the data node
+ * @dn: data node to encrypt
+ * @in_len: length of data to be compressed
+ * @out_len: allocated memory size for the data area of @dn
+ * @block: logical block number of the block
+ *
+ * This function encrypt a possibly-compressed data in the data node.
+ * The encrypted data length will store in @out_len.
+ */
int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn,
unsigned int in_len, unsigned int *out_len, int block)
{
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index fc718f6178f2..3f128b9fdfbb 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2467,7 +2467,7 @@ error_dump:
static inline int chance(unsigned int n, unsigned int out_of)
{
- return !!((prandom_u32() % out_of) + 1 <= n);
+ return !!(prandom_u32_max(out_of) + 1 <= n);
}
@@ -2485,13 +2485,13 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
if (chance(1, 2)) {
d->pc_delay = 1;
/* Fail within 1 minute */
- delay = prandom_u32() % 60000;
+ delay = prandom_u32_max(60000);
d->pc_timeout = jiffies;
d->pc_timeout += msecs_to_jiffies(delay);
ubifs_warn(c, "failing after %lums", delay);
} else {
d->pc_delay = 2;
- delay = prandom_u32() % 10000;
+ delay = prandom_u32_max(10000);
/* Fail within 10000 operations */
d->pc_cnt_max = delay;
ubifs_warn(c, "failing after %lu calls", delay);
@@ -2571,7 +2571,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
unsigned int from, to, ffs = chance(1, 2);
unsigned char *p = (void *)buf;
- from = prandom_u32() % len;
+ from = prandom_u32_max(len);
/* Corruption span max to end of write unit */
to = min(len, ALIGN(from + 1, c->max_write_size));
@@ -2581,7 +2581,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
if (ffs)
memset(p + from, 0xFF, to - from);
else
- prandom_bytes(p + from, to - from);
+ get_random_bytes(p + from, to - from);
return to;
}
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 86151889548e..0f29cf201136 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -68,13 +68,14 @@ static int inherit_flags(const struct inode *dir, umode_t mode)
* @c: UBIFS file-system description object
* @dir: parent directory inode
* @mode: inode mode flags
+ * @is_xattr: whether the inode is xattr inode
*
* This function finds an unused inode number, allocates new inode and
* initializes it. Returns new inode in case of success and an error code in
* case of failure.
*/
struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
- umode_t mode)
+ umode_t mode, bool is_xattr)
{
int err;
struct inode *inode;
@@ -99,10 +100,12 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
current_time(inode);
inode->i_mapping->nrpages = 0;
- err = fscrypt_prepare_new_inode(dir, inode, &encrypted);
- if (err) {
- ubifs_err(c, "fscrypt_prepare_new_inode failed: %i", err);
- goto out_iput;
+ if (!is_xattr) {
+ err = fscrypt_prepare_new_inode(dir, inode, &encrypted);
+ if (err) {
+ ubifs_err(c, "fscrypt_prepare_new_inode failed: %i", err);
+ goto out_iput;
+ }
}
switch (mode & S_IFMT) {
@@ -309,7 +312,7 @@ static int ubifs_create(struct user_namespace *mnt_userns, struct inode *dir,
sz_change = CALC_DENT_SIZE(fname_len(&nm));
- inode = ubifs_new_inode(c, dir, mode);
+ inode = ubifs_new_inode(c, dir, mode, false);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_fname;
@@ -370,7 +373,7 @@ static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry)
if (err)
return ERR_PTR(err);
- inode = ubifs_new_inode(c, dir, mode);
+ inode = ubifs_new_inode(c, dir, mode, false);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_free;
@@ -424,8 +427,9 @@ static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
}
static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+ struct file *file, umode_t mode)
{
+ struct dentry *dentry = file->f_path.dentry;
struct inode *inode;
struct ubifs_info *c = dir->i_sb->s_fs_info;
struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
@@ -462,7 +466,7 @@ static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
return err;
}
- inode = ubifs_new_inode(c, dir, mode);
+ inode = ubifs_new_inode(c, dir, mode, false);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_budg;
@@ -475,7 +479,7 @@ static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
mutex_lock(&ui->ui_mutex);
insert_inode_hash(inode);
- d_tmpfile(dentry, inode);
+ d_tmpfile(file, inode);
ubifs_assert(c, ui->dirty);
instantiated = 1;
@@ -489,7 +493,7 @@ static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
ubifs_release_budget(c, &req);
- return 0;
+ return finish_open_simple(file, 0);
out_cancel:
unlock_2_inodes(dir, inode);
@@ -872,7 +876,7 @@ out_fname:
}
/**
- * check_dir_empty - check if a directory is empty or not.
+ * ubifs_check_dir_empty - check if a directory is empty or not.
* @dir: VFS inode object of the directory to check
*
* This function checks if directory @dir is empty. Returns zero if the
@@ -1004,7 +1008,7 @@ static int ubifs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
sz_change = CALC_DENT_SIZE(fname_len(&nm));
- inode = ubifs_new_inode(c, dir, S_IFDIR | mode);
+ inode = ubifs_new_inode(c, dir, S_IFDIR | mode, false);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_fname;
@@ -1091,7 +1095,7 @@ static int ubifs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
sz_change = CALC_DENT_SIZE(fname_len(&nm));
- inode = ubifs_new_inode(c, dir, mode);
+ inode = ubifs_new_inode(c, dir, mode, false);
if (IS_ERR(inode)) {
kfree(dev);
err = PTR_ERR(inode);
@@ -1173,7 +1177,7 @@ static int ubifs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
sz_change = CALC_DENT_SIZE(fname_len(&nm));
- inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO);
+ inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO, false);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_fname;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 75dab0ae3939..d02509920baf 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -503,7 +503,7 @@ static void mark_inode_clean(struct ubifs_info *c, struct ubifs_inode *ui)
static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent)
{
if (c->double_hash)
- dent->cookie = (__force __le32) prandom_u32();
+ dent->cookie = (__force __le32) get_random_u32();
else
dent->cookie = 0;
}
@@ -1472,23 +1472,25 @@ out_free:
* @block: data block number
* @dn: data node to re-compress
* @new_len: new length
+ * @dn_size: size of the data node @dn in memory
*
* This function is used when an inode is truncated and the last data node of
* the inode has to be re-compressed/encrypted and re-written.
*/
static int truncate_data_node(const struct ubifs_info *c, const struct inode *inode,
unsigned int block, struct ubifs_data_node *dn,
- int *new_len)
+ int *new_len, int dn_size)
{
void *buf;
- int err, dlen, compr_type, out_len, old_dlen;
+ int err, dlen, compr_type, out_len, data_size;
out_len = le32_to_cpu(dn->size);
buf = kmalloc_array(out_len, WORST_COMPR_FACTOR, GFP_NOFS);
if (!buf)
return -ENOMEM;
- dlen = old_dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
+ dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
+ data_size = dn_size - UBIFS_DATA_NODE_SZ;
compr_type = le16_to_cpu(dn->compr_type);
if (IS_ENCRYPTED(inode)) {
@@ -1508,11 +1510,11 @@ static int truncate_data_node(const struct ubifs_info *c, const struct inode *in
}
if (IS_ENCRYPTED(inode)) {
- err = ubifs_encrypt(inode, dn, out_len, &old_dlen, block);
+ err = ubifs_encrypt(inode, dn, out_len, &data_size, block);
if (err)
goto out;
- out_len = old_dlen;
+ out_len = data_size;
} else {
dn->compr_size = 0;
}
@@ -1550,6 +1552,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
struct ubifs_trun_node *trun;
struct ubifs_data_node *dn;
int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode);
+ int dn_size;
struct ubifs_inode *ui = ubifs_inode(inode);
ino_t inum = inode->i_ino;
unsigned int blk;
@@ -1562,10 +1565,13 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
ubifs_assert(c, S_ISREG(inode->i_mode));
ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
- sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ +
- UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR;
+ dn_size = COMPRESSED_DATA_NODE_BUF_SZ;
- sz += ubifs_auth_node_sz(c);
+ if (IS_ENCRYPTED(inode))
+ dn_size += UBIFS_CIPHER_BLOCK_SIZE;
+
+ sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ +
+ dn_size + ubifs_auth_node_sz(c);
ino = kmalloc(sz, GFP_NOFS);
if (!ino)
@@ -1596,15 +1602,15 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) {
ubifs_err(c, "bad data node (block %u, inode %lu)",
blk, inode->i_ino);
- ubifs_dump_node(c, dn, sz - UBIFS_INO_NODE_SZ -
- UBIFS_TRUN_NODE_SZ);
+ ubifs_dump_node(c, dn, dn_size);
goto out_free;
}
if (dn_len <= dlen)
dlen = 0; /* Nothing to do */
else {
- err = truncate_data_node(c, inode, blk, dn, &dlen);
+ err = truncate_data_node(c, inode, blk, dn,
+ &dlen, dn_size);
if (err)
goto out_free;
}
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index d76a19e460cd..cfbc31f709f4 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1970,28 +1970,28 @@ static int dbg_populate_lsave(struct ubifs_info *c)
if (!dbg_is_chk_gen(c))
return 0;
- if (prandom_u32() & 3)
+ if (prandom_u32_max(4))
return 0;
for (i = 0; i < c->lsave_cnt; i++)
c->lsave[i] = c->main_first;
list_for_each_entry(lprops, &c->empty_list, list)
- c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
+ c->lsave[prandom_u32_max(c->lsave_cnt)] = lprops->lnum;
list_for_each_entry(lprops, &c->freeable_list, list)
- c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
+ c->lsave[prandom_u32_max(c->lsave_cnt)] = lprops->lnum;
list_for_each_entry(lprops, &c->frdi_idx_list, list)
- c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
+ c->lsave[prandom_u32_max(c->lsave_cnt)] = lprops->lnum;
heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
for (i = 0; i < heap->cnt; i++)
- c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
+ c->lsave[prandom_u32_max(c->lsave_cnt)] = heap->arr[i]->lnum;
heap = &c->lpt_heap[LPROPS_DIRTY - 1];
for (i = 0; i < heap->cnt; i++)
- c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
+ c->lsave[prandom_u32_max(c->lsave_cnt)] = heap->arr[i]->lnum;
heap = &c->lpt_heap[LPROPS_FREE - 1];
for (i = 0; i < heap->cnt; i++)
- c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
+ c->lsave[prandom_u32_max(c->lsave_cnt)] = heap->arr[i]->lnum;
return 1;
}
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 58c92c96ecef..01362ad5f804 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -700,7 +700,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
c->ilebs[c->ileb_cnt++] = lnum;
dbg_cmt("LEB %d", lnum);
}
- if (dbg_is_chk_index(c) && !(prandom_u32() & 7))
+ if (dbg_is_chk_index(c) && !prandom_u32_max(8))
return -ENOSPC;
return 0;
}
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 7d6d2f152e03..478bbbb5382f 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -2026,7 +2026,7 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags);
/* dir.c */
struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
- umode_t mode);
+ umode_t mode, bool is_xattr);
int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
int ubifs_check_dir_empty(struct inode *dir);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index e4c4761aff7f..3db8486e3725 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -110,7 +110,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
if (err)
return err;
- inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO);
+ inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_budg;
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index cad3772f9dbe..be640f4b2f2c 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -130,7 +130,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
brelse(tmp);
}
if (num) {
- ll_rw_block(REQ_OP_READ | REQ_RAHEAD, num, bha);
+ bh_readahead_batch(num, bha, REQ_RAHEAD);
for (i = 0; i < num; i++)
brelse(bha[i]);
}
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index a2adf6293093..16bcf2c6b8b3 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -89,7 +89,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
brelse(tmp);
}
if (num) {
- ll_rw_block(REQ_OP_READ | REQ_RAHEAD, num, bha);
+ bh_readahead_batch(num, bha, REQ_RAHEAD);
for (i = 0; i < num; i++)
brelse(bha[i]);
}
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 09aef77269fe..5c659e23e578 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -252,6 +252,7 @@ const struct file_operations udf_file_operations = {
.release = udf_release_file,
.fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,
};
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 8d06daed549f..dce6ae9ae306 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1211,13 +1211,7 @@ struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block,
if (!bh)
return NULL;
- if (buffer_uptodate(bh))
- return bh;
-
- ll_rw_block(REQ_OP_READ, 1, &bh);
-
- wait_on_buffer(bh);
- if (buffer_uptodate(bh))
+ if (bh_read(bh, 0) >= 0)
return bh;
brelse(bh);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index b3d5f97f16cd..fb4c30e05245 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -626,7 +626,7 @@ static int udf_create(struct user_namespace *mnt_userns, struct inode *dir,
}
static int udf_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+ struct file *file, umode_t mode)
{
struct inode *inode = udf_new_inode(dir, mode);
@@ -640,9 +640,9 @@ static int udf_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
inode->i_op = &udf_file_inode_operations;
inode->i_fop = &udf_file_operations;
mark_inode_dirty(inode);
- d_tmpfile(dentry, inode);
+ d_tmpfile(file, inode);
unlock_new_inode(inode);
- return 0;
+ return finish_open_simple(file, 0);
}
static int udf_mknod(struct user_namespace *mnt_userns, struct inode *dir,
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index bd810d8239f2..2436e3f82147 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -295,14 +295,10 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
if (!buffer_mapped(bh))
map_bh(bh, inode->i_sb, oldb + pos);
- if (!buffer_uptodate(bh)) {
- ll_rw_block(REQ_OP_READ, 1, &bh);
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh)) {
- ufs_error(inode->i_sb, __func__,
- "read of block failed\n");
- break;
- }
+ if (bh_read(bh, 0) < 0) {
+ ufs_error(inode->i_sb, __func__,
+ "read of block failed\n");
+ break;
}
UFSD(" change from %llu to %llu, pos %u\n",
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 175de70e3adf..98ac37e34e3d 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -30,6 +30,7 @@
#include <linux/security.h>
#include <linux/hugetlb.h>
#include <linux/swapops.h>
+#include <linux/miscdevice.h>
int sysctl_unprivileged_userfaultfd __read_mostly;
@@ -415,13 +416,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
if (ctx->features & UFFD_FEATURE_SIGBUS)
goto out;
- if ((vmf->flags & FAULT_FLAG_USER) == 0 &&
- ctx->flags & UFFD_USER_MODE_ONLY) {
- printk_once(KERN_WARNING "uffd: Set unprivileged_userfaultfd "
- "sysctl knob to 1 if kernel faults must be handled "
- "without obtaining CAP_SYS_PTRACE capability\n");
+ if (!(vmf->flags & FAULT_FLAG_USER) && (ctx->flags & UFFD_USER_MODE_ONLY))
goto out;
- }
/*
* If it's already released don't get it. This avoids to loop
@@ -615,14 +611,16 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
if (release_new_ctx) {
struct vm_area_struct *vma;
struct mm_struct *mm = release_new_ctx->mm;
+ VMA_ITERATOR(vmi, mm, 0);
/* the various vma->vm_userfaultfd_ctx still points to it */
mmap_write_lock(mm);
- for (vma = mm->mmap; vma; vma = vma->vm_next)
+ for_each_vma(vmi, vma) {
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
vma->vm_flags &= ~__VM_UFFD_FLAGS;
}
+ }
mmap_write_unlock(mm);
userfaultfd_ctx_put(release_new_ctx);
@@ -803,11 +801,13 @@ static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
return false;
}
-int userfaultfd_unmap_prep(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- struct list_head *unmaps)
+int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
+ unsigned long end, struct list_head *unmaps)
{
- for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
+ VMA_ITERATOR(vmi, mm, start);
+ struct vm_area_struct *vma;
+
+ for_each_vma_range(vmi, vma, end) {
struct userfaultfd_unmap_ctx *unmap_ctx;
struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
@@ -857,6 +857,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
/* len == 0 means wake all */
struct userfaultfd_wake_range range = { .len = 0, };
unsigned long new_flags;
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
WRITE_ONCE(ctx->released, true);
@@ -873,7 +874,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
*/
mmap_write_lock(mm);
prev = NULL;
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ mas_for_each(&mas, vma, ULONG_MAX) {
cond_resched();
BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^
!!(vma->vm_flags & __VM_UFFD_FLAGS));
@@ -887,10 +888,13 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
NULL_VM_UFFD_CTX, anon_vma_name(vma));
- if (prev)
+ if (prev) {
+ mas_pause(&mas);
vma = prev;
- else
+ } else {
prev = vma;
+ }
+
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
@@ -991,7 +995,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *new,
int fd;
fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new,
- O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
+ O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
if (fd < 0)
return fd;
@@ -1272,6 +1276,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
bool found;
bool basic_ioctls;
unsigned long start, end, vma_end;
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -1314,7 +1319,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
goto out;
mmap_write_lock(mm);
- vma = find_vma_prev(mm, start, &prev);
+ mas_set(&mas, start);
+ vma = mas_find(&mas, ULONG_MAX);
if (!vma)
goto out_unlock;
@@ -1339,7 +1345,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
*/
found = false;
basic_ioctls = false;
- for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
+ for (cur = vma; cur; cur = mas_next(&mas, end - 1)) {
cond_resched();
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
@@ -1399,8 +1405,10 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
}
BUG_ON(!found);
- if (vma->vm_start < start)
- prev = vma;
+ mas_set(&mas, start);
+ prev = mas_prev(&mas, 0);
+ if (prev != vma)
+ mas_next(&mas, ULONG_MAX);
ret = 0;
do {
@@ -1430,6 +1438,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
((struct vm_userfaultfd_ctx){ ctx }),
anon_vma_name(vma));
if (prev) {
+ /* vma_merge() invalidated the mas */
+ mas_pause(&mas);
vma = prev;
goto next;
}
@@ -1437,11 +1447,15 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
ret = split_vma(mm, vma, start, 1);
if (ret)
break;
+ /* split_vma() invalidated the mas */
+ mas_pause(&mas);
}
if (vma->vm_end > end) {
ret = split_vma(mm, vma, end, 0);
if (ret)
break;
+ /* split_vma() invalidated the mas */
+ mas_pause(&mas);
}
next:
/*
@@ -1458,8 +1472,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
skip:
prev = vma;
start = vma->vm_end;
- vma = vma->vm_next;
- } while (vma && vma->vm_start < end);
+ vma = mas_next(&mas, end - 1);
+ } while (vma);
out_unlock:
mmap_write_unlock(mm);
mmput(mm);
@@ -1503,6 +1517,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
bool found;
unsigned long start, end, vma_end;
const void __user *buf = (void __user *)arg;
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
ret = -EFAULT;
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
@@ -1521,7 +1536,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
goto out;
mmap_write_lock(mm);
- vma = find_vma_prev(mm, start, &prev);
+ mas_set(&mas, start);
+ vma = mas_find(&mas, ULONG_MAX);
if (!vma)
goto out_unlock;
@@ -1546,7 +1562,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
*/
found = false;
ret = -EINVAL;
- for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
+ for (cur = vma; cur; cur = mas_next(&mas, end - 1)) {
cond_resched();
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
@@ -1566,8 +1582,10 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
}
BUG_ON(!found);
- if (vma->vm_start < start)
- prev = vma;
+ mas_set(&mas, start);
+ prev = mas_prev(&mas, 0);
+ if (prev != vma)
+ mas_next(&mas, ULONG_MAX);
ret = 0;
do {
@@ -1612,17 +1630,20 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
NULL_VM_UFFD_CTX, anon_vma_name(vma));
if (prev) {
vma = prev;
+ mas_pause(&mas);
goto next;
}
if (vma->vm_start < start) {
ret = split_vma(mm, vma, start, 1);
if (ret)
break;
+ mas_pause(&mas);
}
if (vma->vm_end > end) {
ret = split_vma(mm, vma, end, 0);
if (ret)
break;
+ mas_pause(&mas);
}
next:
/*
@@ -1636,8 +1657,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
skip:
prev = vma;
start = vma->vm_end;
- vma = vma->vm_next;
- } while (vma && vma->vm_start < end);
+ vma = mas_next(&mas, end - 1);
+ } while (vma);
out_unlock:
mmap_write_unlock(mm);
mmput(mm);
@@ -2056,20 +2077,11 @@ static void init_once_userfaultfd_ctx(void *mem)
seqcount_spinlock_init(&ctx->refile_seq, &ctx->fault_pending_wqh.lock);
}
-SYSCALL_DEFINE1(userfaultfd, int, flags)
+static int new_userfaultfd(int flags)
{
struct userfaultfd_ctx *ctx;
int fd;
- if (!sysctl_unprivileged_userfaultfd &&
- (flags & UFFD_USER_MODE_ONLY) == 0 &&
- !capable(CAP_SYS_PTRACE)) {
- printk_once(KERN_WARNING "uffd: Set unprivileged_userfaultfd "
- "sysctl knob to 1 if kernel faults must be handled "
- "without obtaining CAP_SYS_PTRACE capability\n");
- return -EPERM;
- }
-
BUG_ON(!current->mm);
/* Check the UFFD_* constants for consistency. */
@@ -2094,7 +2106,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
mmgrab(ctx->mm);
fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx,
- O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
+ O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
if (fd < 0) {
mmdrop(ctx->mm);
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
@@ -2102,8 +2114,60 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
return fd;
}
+static inline bool userfaultfd_syscall_allowed(int flags)
+{
+ /* Userspace-only page faults are always allowed */
+ if (flags & UFFD_USER_MODE_ONLY)
+ return true;
+
+ /*
+ * The user is requesting a userfaultfd which can handle kernel faults.
+ * Privileged users are always allowed to do this.
+ */
+ if (capable(CAP_SYS_PTRACE))
+ return true;
+
+ /* Otherwise, access to kernel fault handling is sysctl controlled. */
+ return sysctl_unprivileged_userfaultfd;
+}
+
+SYSCALL_DEFINE1(userfaultfd, int, flags)
+{
+ if (!userfaultfd_syscall_allowed(flags))
+ return -EPERM;
+
+ return new_userfaultfd(flags);
+}
+
+static long userfaultfd_dev_ioctl(struct file *file, unsigned int cmd, unsigned long flags)
+{
+ if (cmd != USERFAULTFD_IOC_NEW)
+ return -EINVAL;
+
+ return new_userfaultfd(flags);
+}
+
+static const struct file_operations userfaultfd_dev_fops = {
+ .unlocked_ioctl = userfaultfd_dev_ioctl,
+ .compat_ioctl = userfaultfd_dev_ioctl,
+ .owner = THIS_MODULE,
+ .llseek = noop_llseek,
+};
+
+static struct miscdevice userfaultfd_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "userfaultfd",
+ .fops = &userfaultfd_dev_fops
+};
+
static int __init userfaultfd_init(void)
{
+ int ret;
+
+ ret = misc_register(&userfaultfd_misc);
+ if (ret)
+ return ret;
+
userfaultfd_ctx_cachep = kmem_cache_create("userfaultfd_ctx_cache",
sizeof(struct userfaultfd_ctx),
0,
diff --git a/fs/verity/read_metadata.c b/fs/verity/read_metadata.c
index 6ee849dc7bc1..2aefc5565152 100644
--- a/fs/verity/read_metadata.c
+++ b/fs/verity/read_metadata.c
@@ -53,14 +53,14 @@ static int fsverity_read_merkle_tree(struct inode *inode,
break;
}
- virt = kmap(page);
+ virt = kmap_local_page(page);
if (copy_to_user(buf, virt + offs_in_page, bytes_to_copy)) {
- kunmap(page);
+ kunmap_local(virt);
put_page(page);
err = -EFAULT;
break;
}
- kunmap(page);
+ kunmap_local(virt);
put_page(page);
retval += bytes_to_copy;
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
index 14e2fb49cff5..bde8c9b7d25f 100644
--- a/fs/verity/verify.c
+++ b/fs/verity/verify.c
@@ -39,16 +39,6 @@ static void hash_at_level(const struct merkle_tree_params *params,
(params->log_blocksize - params->log_arity);
}
-/* Extract a hash from a hash page */
-static void extract_hash(struct page *hpage, unsigned int hoffset,
- unsigned int hsize, u8 *out)
-{
- void *virt = kmap_atomic(hpage);
-
- memcpy(out, virt + hoffset, hsize);
- kunmap_atomic(virt);
-}
-
static inline int cmp_hashes(const struct fsverity_info *vi,
const u8 *want_hash, const u8 *real_hash,
pgoff_t index, int level)
@@ -129,7 +119,7 @@ static bool verify_page(struct inode *inode, const struct fsverity_info *vi,
}
if (PageChecked(hpage)) {
- extract_hash(hpage, hoffset, hsize, _want_hash);
+ memcpy_from_page(_want_hash, hpage, hoffset, hsize);
want_hash = _want_hash;
put_page(hpage);
pr_debug_ratelimited("Hash page already checked, want %s:%*phN\n",
@@ -158,7 +148,7 @@ descend:
if (err)
goto out;
SetPageChecked(hpage);
- extract_hash(hpage, hoffset, hsize, _want_hash);
+ memcpy_from_page(_want_hash, hpage, hoffset, hsize);
want_hash = _want_hash;
put_page(hpage);
pr_debug("Verified hash page at level %d, now want %s:%*phN\n",
diff --git a/fs/xattr.c b/fs/xattr.c
index a1f4998bc6be..61107b6bbed2 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -290,7 +290,7 @@ static inline bool is_posix_acl_xattr(const char *name)
int
vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
- const char *name, void *value, size_t size, int flags)
+ const char *name, const void *value, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
struct inode *delegated_inode = NULL;
@@ -298,16 +298,12 @@ vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
int error;
if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
- error = cap_convert_nscap(mnt_userns, dentry,
- (const void **)&value, size);
+ error = cap_convert_nscap(mnt_userns, dentry, &value, size);
if (error < 0)
return error;
size = error;
}
- if (size && is_posix_acl_xattr(name))
- posix_acl_setxattr_idmapped_mnt(mnt_userns, inode, value, size);
-
retry_deleg:
inode_lock(inode);
error = __vfs_setxattr_locked(mnt_userns, dentry, name, value, size,
@@ -587,9 +583,7 @@ int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
static void setxattr_convert(struct user_namespace *mnt_userns,
struct dentry *d, struct xattr_ctx *ctx)
{
- if (ctx->size &&
- ((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
- (strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
+ if (ctx->size && is_posix_acl_xattr(ctx->kname->name))
posix_acl_fix_xattr_from_user(ctx->kvalue, ctx->size);
}
@@ -705,8 +699,7 @@ do_getxattr(struct user_namespace *mnt_userns, struct dentry *d,
error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size);
if (error > 0) {
- if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
- (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
+ if (is_posix_acl_xattr(kname))
posix_acl_fix_xattr_to_user(ctx->kvalue, error);
if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
error = -EFAULT;
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 517a138faa66..191b22b9a35b 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -133,6 +133,21 @@ xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno)
return true;
}
+static inline bool
+xfs_verify_agbext(
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
+ xfs_agblock_t len)
+{
+ if (agbno + len <= agbno)
+ return false;
+
+ if (!xfs_verify_agbno(pag, agbno))
+ return false;
+
+ return xfs_verify_agbno(pag, agbno + len - 1);
+}
+
/*
* Verify that an AG inode number pointer neither points outside the AG
* nor points at static metadata.
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index e2bdf089c0a3..de79f5d07f65 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -263,11 +263,7 @@ xfs_alloc_get_rec(
goto out_bad_rec;
/* check for valid extent range, including overflow */
- if (!xfs_verify_agbno(pag, *bno))
- goto out_bad_rec;
- if (*bno > *bno + *len)
- goto out_bad_rec;
- if (!xfs_verify_agbno(pag, *bno + *len - 1))
+ if (!xfs_verify_agbext(pag, *bno, *len))
goto out_bad_rec;
return 0;
@@ -1520,7 +1516,7 @@ xfs_alloc_ag_vextent_lastblock(
#ifdef DEBUG
/* Randomly don't execute the first algorithm. */
- if (prandom_u32() & 1)
+ if (prandom_u32_max(2))
return 0;
#endif
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index e56723dc9cd5..49d0d4ea63fc 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -294,7 +294,7 @@ xfs_check_block(
else
thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
if (*thispa == *pp) {
- xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
+ xfs_warn(mp, "%s: thispa(%d) == pp(%d) %lld",
__func__, j, i,
(unsigned long long)be64_to_cpu(*thispa));
xfs_err(mp, "%s: ptrs are equal in node\n",
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e7201dc68f43..e576560b46e9 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2192,8 +2192,8 @@ xfs_da_grow_inode_int(
*/
mapp = kmem_alloc(sizeof(*mapp) * count, 0);
for (b = *bno, mapi = 0; b < *bno + count; ) {
- nmap = min(XFS_BMAP_MAX_NMAP, count);
c = (int)(*bno + count - b);
+ nmap = min(XFS_BMAP_MAX_NMAP, c);
error = xfs_bmapi_write(tp, dp, b, c,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
args->total, &mapp[mapi], &nmap);
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 76eedc2756b3..92bac3373f1f 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -261,7 +261,7 @@ xfs_dir_createname(
{
struct xfs_da_args *args;
int rval;
- int v; /* type-checking value */
+ bool v;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
@@ -357,7 +357,7 @@ xfs_dir_lookup(
{
struct xfs_da_args *args;
int rval;
- int v; /* type-checking value */
+ bool v;
int lock_mode;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
@@ -435,7 +435,7 @@ xfs_dir_removename(
{
struct xfs_da_args *args;
int rval;
- int v; /* type-checking value */
+ bool v;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
XFS_STATS_INC(dp->i_mount, xs_dir_remove);
@@ -493,7 +493,7 @@ xfs_dir_replace(
{
struct xfs_da_args *args;
int rval;
- int v; /* type-checking value */
+ bool v;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
@@ -610,19 +610,23 @@ xfs_dir2_grow_inode(
int
xfs_dir2_isblock(
struct xfs_da_args *args,
- int *vp) /* out: 1 is block, 0 is not block */
+ bool *isblock)
{
- xfs_fileoff_t last; /* last file offset */
- int rval;
+ struct xfs_mount *mp = args->dp->i_mount;
+ xfs_fileoff_t eof;
+ int error;
- if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
- return rval;
- rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
- if (XFS_IS_CORRUPT(args->dp->i_mount,
- rval != 0 &&
- args->dp->i_disk_size != args->geo->blksize))
+ error = xfs_bmap_last_offset(args->dp, &eof, XFS_DATA_FORK);
+ if (error)
+ return error;
+
+ *isblock = false;
+ if (XFS_FSB_TO_B(mp, eof) != args->geo->blksize)
+ return 0;
+
+ *isblock = true;
+ if (XFS_IS_CORRUPT(mp, args->dp->i_disk_size != args->geo->blksize))
return -EFSCORRUPTED;
- *vp = rval;
return 0;
}
@@ -632,14 +636,20 @@ xfs_dir2_isblock(
int
xfs_dir2_isleaf(
struct xfs_da_args *args,
- int *vp) /* out: 1 is block, 0 is not block */
+ bool *isleaf)
{
- xfs_fileoff_t last; /* last file offset */
- int rval;
+ xfs_fileoff_t eof;
+ int error;
- if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
- return rval;
- *vp = last == args->geo->leafblk + args->geo->fsbcount;
+ error = xfs_bmap_last_offset(args->dp, &eof, XFS_DATA_FORK);
+ if (error)
+ return error;
+
+ *isleaf = false;
+ if (eof != args->geo->leafblk + args->geo->fsbcount)
+ return 0;
+
+ *isleaf = true;
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index b6df3c34b26a..dd39f17dd9a9 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -61,8 +61,8 @@ extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
/*
* Interface routines used by userspace utilities
*/
-extern int xfs_dir2_isblock(struct xfs_da_args *args, int *r);
-extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r);
+extern int xfs_dir2_isblock(struct xfs_da_args *args, bool *isblock);
+extern int xfs_dir2_isleaf(struct xfs_da_args *args, bool *isleaf);
extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
struct xfs_buf *bp);
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index d9b66306a9a7..cb9e950a911d 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -146,6 +146,8 @@ xfs_dir3_leaf_check_int(
xfs_dir2_leaf_tail_t *ltp;
int stale;
int i;
+ bool isleaf1 = (hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
+ hdr->magic == XFS_DIR3_LEAF1_MAGIC);
ltp = xfs_dir2_leaf_tail_p(geo, leaf);
@@ -158,8 +160,7 @@ xfs_dir3_leaf_check_int(
return __this_address;
/* Leaves and bests don't overlap in leaf format. */
- if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
- hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
+ if (isleaf1 &&
(char *)&hdr->ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
return __this_address;
@@ -175,6 +176,10 @@ xfs_dir3_leaf_check_int(
}
if (hdr->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
+ if (isleaf1 && xfs_dir2_dataptr_to_db(geo,
+ be32_to_cpu(hdr->ents[i].address)) >=
+ be32_to_cpu(ltp->bestcount))
+ return __this_address;
}
if (hdr->stale != stale)
return __this_address;
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 003812fd7d35..8cd37e6e9d38 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -865,7 +865,6 @@ xfs_dir2_sf_lookup(
struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
int i; /* entry index */
- int error;
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
enum xfs_dacmp cmp; /* comparison result */
@@ -929,8 +928,7 @@ xfs_dir2_sf_lookup(
if (!ci_sfep)
return -ENOENT;
/* otherwise process the CI match as required by the caller */
- error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
- return error;
+ return xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
}
/*
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index b55bdfa9c8a8..371dc07233e0 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1564,20 +1564,6 @@ struct xfs_rmap_rec {
#define RMAPBT_UNUSED_OFFSET_BITLEN 7
#define RMAPBT_OFFSET_BITLEN 54
-#define XFS_RMAP_ATTR_FORK (1 << 0)
-#define XFS_RMAP_BMBT_BLOCK (1 << 1)
-#define XFS_RMAP_UNWRITTEN (1 << 2)
-#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \
- XFS_RMAP_BMBT_BLOCK)
-#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN)
-struct xfs_rmap_irec {
- xfs_agblock_t rm_startblock; /* extent start block */
- xfs_extlen_t rm_blockcount; /* extent length */
- uint64_t rm_owner; /* extent owner */
- uint64_t rm_offset; /* offset within the owner */
- unsigned int rm_flags; /* state flags */
-};
-
/*
* Key structure
*
@@ -1626,7 +1612,7 @@ unsigned int xfs_refc_block(struct xfs_mount *mp);
* on the startblock. This speeds up mount time deletion of stale
* staging extents because they're all at the right side of the tree.
*/
-#define XFS_REFC_COW_START ((xfs_agblock_t)(1U << 31))
+#define XFS_REFC_COWFLAG (1U << 31)
#define REFCNTBT_COWFLAG_BITLEN 1
#define REFCNTBT_AGBLOCK_BITLEN 31
@@ -1640,12 +1626,6 @@ struct xfs_refcount_key {
__be32 rc_startblock; /* starting block number */
};
-struct xfs_refcount_irec {
- xfs_agblock_t rc_startblock; /* starting block number */
- xfs_extlen_t rc_blockcount; /* count of free blocks */
- xfs_nlink_t rc_refcount; /* number of inodes linked here */
-};
-
#define MAXREFCOUNT ((xfs_nlink_t)~0U)
#define MAXREFCEXTLEN ((xfs_extlen_t)~0U)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 6cdfd64bc56b..94db50eb706a 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -636,7 +636,7 @@ xfs_ialloc_ag_alloc(
/* randomly do sparse inode allocations */
if (xfs_has_sparseinodes(tp->t_mountp) &&
igeo->ialloc_min_blks < igeo->ialloc_blks)
- do_sparse = prandom_u32() & 1;
+ do_sparse = prandom_u32_max(2);
#endif
/*
@@ -805,7 +805,7 @@ sparse_alloc:
* number from being easily guessable.
*/
error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag->pag_agno,
- args.agbno, args.len, prandom_u32());
+ args.agbno, args.len, get_random_u32());
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 9327a4f39206..6b21760184d9 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -78,7 +78,7 @@ xfs_iformat_local(
*/
if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
xfs_warn(ip->i_mount,
- "corrupt inode %Lu (bad size %d for local fork, size = %zd).",
+ "corrupt inode %llu (bad size %d for local fork, size = %zd).",
(unsigned long long) ip->i_ino, size,
XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
@@ -192,7 +192,7 @@ xfs_iformat_btree(
XFS_DFORK_SIZE(dip, mp, whichfork) ||
ifp->if_nextents > ip->i_nblocks) ||
level == 0 || level > XFS_BM_MAXLEVELS(mp, whichfork)) {
- xfs_warn(mp, "corrupt inode %Lu (btree).",
+ xfs_warn(mp, "corrupt inode %llu (btree).",
(unsigned long long) ip->i_ino);
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iformat_btree", dfp, size,
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index b351b9dc6561..f13e0809dc63 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -613,25 +613,49 @@ typedef struct xfs_efi_log_format {
uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */
- xfs_extent_t efi_extents[1]; /* array of extents to free */
+ xfs_extent_t efi_extents[]; /* array of extents to free */
} xfs_efi_log_format_t;
+static inline size_t
+xfs_efi_log_format_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efi_log_format) +
+ nr * sizeof(struct xfs_extent);
+}
+
typedef struct xfs_efi_log_format_32 {
uint16_t efi_type; /* efi log item type */
uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */
- xfs_extent_32_t efi_extents[1]; /* array of extents to free */
+ xfs_extent_32_t efi_extents[]; /* array of extents to free */
} __attribute__((packed)) xfs_efi_log_format_32_t;
+static inline size_t
+xfs_efi_log_format32_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efi_log_format_32) +
+ nr * sizeof(struct xfs_extent_32);
+}
+
typedef struct xfs_efi_log_format_64 {
uint16_t efi_type; /* efi log item type */
uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */
- xfs_extent_64_t efi_extents[1]; /* array of extents to free */
+ xfs_extent_64_t efi_extents[]; /* array of extents to free */
} xfs_efi_log_format_64_t;
+static inline size_t
+xfs_efi_log_format64_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efi_log_format_64) +
+ nr * sizeof(struct xfs_extent_64);
+}
+
/*
* This is the structure used to lay out an efd log item in the
* log. The efd_extents array is a variable size array whose
@@ -642,25 +666,49 @@ typedef struct xfs_efd_log_format {
uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */
- xfs_extent_t efd_extents[1]; /* array of extents freed */
+ xfs_extent_t efd_extents[]; /* array of extents freed */
} xfs_efd_log_format_t;
+static inline size_t
+xfs_efd_log_format_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efd_log_format) +
+ nr * sizeof(struct xfs_extent);
+}
+
typedef struct xfs_efd_log_format_32 {
uint16_t efd_type; /* efd log item type */
uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */
- xfs_extent_32_t efd_extents[1]; /* array of extents freed */
+ xfs_extent_32_t efd_extents[]; /* array of extents freed */
} __attribute__((packed)) xfs_efd_log_format_32_t;
+static inline size_t
+xfs_efd_log_format32_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efd_log_format_32) +
+ nr * sizeof(struct xfs_extent_32);
+}
+
typedef struct xfs_efd_log_format_64 {
uint16_t efd_type; /* efd log item type */
uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */
- xfs_extent_64_t efd_extents[1]; /* array of extents freed */
+ xfs_extent_64_t efd_extents[]; /* array of extents freed */
} xfs_efd_log_format_64_t;
+static inline size_t
+xfs_efd_log_format64_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efd_log_format_64) +
+ nr * sizeof(struct xfs_extent_64);
+}
+
/*
* RUI/RUD (reverse mapping) log format definitions
*/
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 64b910caafaa..3f34bafe18dd 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -46,13 +46,16 @@ STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
int
xfs_refcount_lookup_le(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno,
+ trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_LE);
cur->bc_rec.rc.rc_startblock = bno;
cur->bc_rec.rc.rc_blockcount = 0;
+ cur->bc_rec.rc.rc_domain = domain;
return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
}
@@ -63,13 +66,16 @@ xfs_refcount_lookup_le(
int
xfs_refcount_lookup_ge(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno,
+ trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_GE);
cur->bc_rec.rc.rc_startblock = bno;
cur->bc_rec.rc.rc_blockcount = 0;
+ cur->bc_rec.rc.rc_domain = domain;
return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
}
@@ -80,13 +86,16 @@ xfs_refcount_lookup_ge(
int
xfs_refcount_lookup_eq(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno,
+ trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_LE);
cur->bc_rec.rc.rc_startblock = bno;
cur->bc_rec.rc.rc_blockcount = 0;
+ cur->bc_rec.rc.rc_domain = domain;
return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
}
@@ -96,7 +105,17 @@ xfs_refcount_btrec_to_irec(
const union xfs_btree_rec *rec,
struct xfs_refcount_irec *irec)
{
- irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock);
+ uint32_t start;
+
+ start = be32_to_cpu(rec->refc.rc_startblock);
+ if (start & XFS_REFC_COWFLAG) {
+ start &= ~XFS_REFC_COWFLAG;
+ irec->rc_domain = XFS_REFC_DOMAIN_COW;
+ } else {
+ irec->rc_domain = XFS_REFC_DOMAIN_SHARED;
+ }
+
+ irec->rc_startblock = start;
irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
}
@@ -114,7 +133,6 @@ xfs_refcount_get_rec(
struct xfs_perag *pag = cur->bc_ag.pag;
union xfs_btree_rec *rec;
int error;
- xfs_agblock_t realstart;
error = xfs_btree_get_rec(cur, &rec, stat);
if (error || !*stat)
@@ -124,22 +142,11 @@ xfs_refcount_get_rec(
if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
goto out_bad_rec;
- /* handle special COW-staging state */
- realstart = irec->rc_startblock;
- if (realstart & XFS_REFC_COW_START) {
- if (irec->rc_refcount != 1)
- goto out_bad_rec;
- realstart &= ~XFS_REFC_COW_START;
- } else if (irec->rc_refcount < 2) {
+ if (!xfs_refcount_check_domain(irec))
goto out_bad_rec;
- }
/* check for valid extent range, including overflow */
- if (!xfs_verify_agbno(pag, realstart))
- goto out_bad_rec;
- if (realstart > realstart + irec->rc_blockcount)
- goto out_bad_rec;
- if (!xfs_verify_agbno(pag, realstart + irec->rc_blockcount - 1))
+ if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount))
goto out_bad_rec;
if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT)
@@ -169,12 +176,17 @@ xfs_refcount_update(
struct xfs_refcount_irec *irec)
{
union xfs_btree_rec rec;
+ uint32_t start;
int error;
trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
- rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock);
+
+ start = xfs_refcount_encode_startblock(irec->rc_startblock,
+ irec->rc_domain);
+ rec.refc.rc_startblock = cpu_to_be32(start);
rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount);
rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount);
+
error = xfs_btree_update(cur, &rec);
if (error)
trace_xfs_refcount_update_error(cur->bc_mp,
@@ -196,9 +208,12 @@ xfs_refcount_insert(
int error;
trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
+
cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
+ cur->bc_rec.rc.rc_domain = irec->rc_domain;
+
error = xfs_btree_insert(cur, i);
if (error)
goto out_error;
@@ -244,7 +259,8 @@ xfs_refcount_delete(
}
if (error)
goto out_error;
- error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec);
+ error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock,
+ &found_rec);
out_error:
if (error)
trace_xfs_refcount_delete_error(cur->bc_mp,
@@ -343,6 +359,7 @@ xfs_refc_next(
STATIC int
xfs_refcount_split_extent(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t agbno,
bool *shape_changed)
{
@@ -351,7 +368,7 @@ xfs_refcount_split_extent(
int error;
*shape_changed = false;
- error = xfs_refcount_lookup_le(cur, agbno, &found_rec);
+ error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec);
if (error)
goto out_error;
if (!found_rec)
@@ -364,6 +381,8 @@ xfs_refcount_split_extent(
error = -EFSCORRUPTED;
goto out_error;
}
+ if (rcext.rc_domain != domain)
+ return 0;
if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno)
return 0;
@@ -415,6 +434,9 @@ xfs_refcount_merge_center_extents(
trace_xfs_refcount_merge_center_extents(cur->bc_mp,
cur->bc_ag.pag->pag_agno, left, center, right);
+ ASSERT(left->rc_domain == center->rc_domain);
+ ASSERT(right->rc_domain == center->rc_domain);
+
/*
* Make sure the center and right extents are not in the btree.
* If the center extent was synthesized, the first delete call
@@ -423,8 +445,8 @@ xfs_refcount_merge_center_extents(
* call removes the center and the second one removes the right
* extent.
*/
- error = xfs_refcount_lookup_ge(cur, center->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_ge(cur, center->rc_domain,
+ center->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -451,8 +473,8 @@ xfs_refcount_merge_center_extents(
}
/* Enlarge the left extent. */
- error = xfs_refcount_lookup_le(cur, left->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, left->rc_domain,
+ left->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -491,10 +513,12 @@ xfs_refcount_merge_left_extent(
trace_xfs_refcount_merge_left_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, left, cleft);
+ ASSERT(left->rc_domain == cleft->rc_domain);
+
/* If the extent at agbno (cleft) wasn't synthesized, remove it. */
if (cleft->rc_refcount > 1) {
- error = xfs_refcount_lookup_le(cur, cleft->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, cleft->rc_domain,
+ cleft->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -512,8 +536,8 @@ xfs_refcount_merge_left_extent(
}
/* Enlarge the left extent. */
- error = xfs_refcount_lookup_le(cur, left->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, left->rc_domain,
+ left->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -552,13 +576,15 @@ xfs_refcount_merge_right_extent(
trace_xfs_refcount_merge_right_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, cright, right);
+ ASSERT(right->rc_domain == cright->rc_domain);
+
/*
* If the extent ending at agbno+aglen (cright) wasn't synthesized,
* remove it.
*/
if (cright->rc_refcount > 1) {
- error = xfs_refcount_lookup_le(cur, cright->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, cright->rc_domain,
+ cright->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -576,8 +602,8 @@ xfs_refcount_merge_right_extent(
}
/* Enlarge the right extent. */
- error = xfs_refcount_lookup_le(cur, right->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, right->rc_domain,
+ right->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -600,8 +626,6 @@ out_error:
return error;
}
-#define XFS_FIND_RCEXT_SHARED 1
-#define XFS_FIND_RCEXT_COW 2
/*
* Find the left extent and the one after it (cleft). This function assumes
* that we've already split any extent crossing agbno.
@@ -611,16 +635,16 @@ xfs_refcount_find_left_extents(
struct xfs_btree_cur *cur,
struct xfs_refcount_irec *left,
struct xfs_refcount_irec *cleft,
+ enum xfs_refc_domain domain,
xfs_agblock_t agbno,
- xfs_extlen_t aglen,
- int flags)
+ xfs_extlen_t aglen)
{
struct xfs_refcount_irec tmp;
int error;
int found_rec;
left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK;
- error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec);
+ error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec);
if (error)
goto out_error;
if (!found_rec)
@@ -634,11 +658,9 @@ xfs_refcount_find_left_extents(
goto out_error;
}
- if (xfs_refc_next(&tmp) != agbno)
- return 0;
- if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
+ if (tmp.rc_domain != domain)
return 0;
- if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
+ if (xfs_refc_next(&tmp) != agbno)
return 0;
/* We have a left extent; retrieve (or invent) the next right one */
*left = tmp;
@@ -655,6 +677,9 @@ xfs_refcount_find_left_extents(
goto out_error;
}
+ if (tmp.rc_domain != domain)
+ goto not_found;
+
/* if tmp starts at the end of our range, just use that */
if (tmp.rc_startblock == agbno)
*cleft = tmp;
@@ -671,8 +696,10 @@ xfs_refcount_find_left_extents(
cleft->rc_blockcount = min(aglen,
tmp.rc_startblock - agbno);
cleft->rc_refcount = 1;
+ cleft->rc_domain = domain;
}
} else {
+not_found:
/*
* No extents, so pretend that there's one covering the whole
* range.
@@ -680,6 +707,7 @@ xfs_refcount_find_left_extents(
cleft->rc_startblock = agbno;
cleft->rc_blockcount = aglen;
cleft->rc_refcount = 1;
+ cleft->rc_domain = domain;
}
trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
left, cleft, agbno);
@@ -700,16 +728,16 @@ xfs_refcount_find_right_extents(
struct xfs_btree_cur *cur,
struct xfs_refcount_irec *right,
struct xfs_refcount_irec *cright,
+ enum xfs_refc_domain domain,
xfs_agblock_t agbno,
- xfs_extlen_t aglen,
- int flags)
+ xfs_extlen_t aglen)
{
struct xfs_refcount_irec tmp;
int error;
int found_rec;
right->rc_startblock = cright->rc_startblock = NULLAGBLOCK;
- error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec);
+ error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec);
if (error)
goto out_error;
if (!found_rec)
@@ -723,11 +751,9 @@ xfs_refcount_find_right_extents(
goto out_error;
}
- if (tmp.rc_startblock != agbno + aglen)
- return 0;
- if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
+ if (tmp.rc_domain != domain)
return 0;
- if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
+ if (tmp.rc_startblock != agbno + aglen)
return 0;
/* We have a right extent; retrieve (or invent) the next left one */
*right = tmp;
@@ -744,6 +770,9 @@ xfs_refcount_find_right_extents(
goto out_error;
}
+ if (tmp.rc_domain != domain)
+ goto not_found;
+
/* if tmp ends at the end of our range, just use that */
if (xfs_refc_next(&tmp) == agbno + aglen)
*cright = tmp;
@@ -760,8 +789,10 @@ xfs_refcount_find_right_extents(
cright->rc_blockcount = right->rc_startblock -
cright->rc_startblock;
cright->rc_refcount = 1;
+ cright->rc_domain = domain;
}
} else {
+not_found:
/*
* No extents, so pretend that there's one covering the whole
* range.
@@ -769,6 +800,7 @@ xfs_refcount_find_right_extents(
cright->rc_startblock = agbno;
cright->rc_blockcount = aglen;
cright->rc_refcount = 1;
+ cright->rc_domain = domain;
}
trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
cright, right, agbno + aglen);
@@ -794,10 +826,10 @@ xfs_refc_valid(
STATIC int
xfs_refcount_merge_extents(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t *agbno,
xfs_extlen_t *aglen,
enum xfs_refc_adjust_op adjust,
- int flags,
bool *shape_changed)
{
struct xfs_refcount_irec left = {0}, cleft = {0};
@@ -812,12 +844,12 @@ xfs_refcount_merge_extents(
* just below (agbno + aglen) [cright], and just above (agbno + aglen)
* [right].
*/
- error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno,
- *aglen, flags);
+ error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain,
+ *agbno, *aglen);
if (error)
return error;
- error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno,
- *aglen, flags);
+ error = xfs_refcount_find_right_extents(cur, &right, &cright, domain,
+ *agbno, *aglen);
if (error)
return error;
@@ -870,7 +902,7 @@ xfs_refcount_merge_extents(
aglen);
}
- return error;
+ return 0;
}
/*
@@ -933,7 +965,8 @@ xfs_refcount_adjust_extents(
if (*aglen == 0)
return 0;
- error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec);
+ error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno,
+ &found_rec);
if (error)
goto out_error;
@@ -941,10 +974,11 @@ xfs_refcount_adjust_extents(
error = xfs_refcount_get_rec(cur, &ext, &found_rec);
if (error)
goto out_error;
- if (!found_rec) {
+ if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) {
ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
ext.rc_blockcount = 0;
ext.rc_refcount = 0;
+ ext.rc_domain = XFS_REFC_DOMAIN_SHARED;
}
/*
@@ -957,6 +991,8 @@ xfs_refcount_adjust_extents(
tmp.rc_blockcount = min(*aglen,
ext.rc_startblock - *agbno);
tmp.rc_refcount = 1 + adj;
+ tmp.rc_domain = XFS_REFC_DOMAIN_SHARED;
+
trace_xfs_refcount_modify_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, &tmp);
@@ -986,15 +1022,30 @@ xfs_refcount_adjust_extents(
(*agbno) += tmp.rc_blockcount;
(*aglen) -= tmp.rc_blockcount;
- error = xfs_refcount_lookup_ge(cur, *agbno,
+ /* Stop if there's nothing left to modify */
+ if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
+ break;
+
+ /* Move the cursor to the start of ext. */
+ error = xfs_refcount_lookup_ge(cur,
+ XFS_REFC_DOMAIN_SHARED, *agbno,
&found_rec);
if (error)
goto out_error;
}
- /* Stop if there's nothing left to modify */
- if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
- break;
+ /*
+ * A previous step trimmed agbno/aglen such that the end of the
+ * range would not be in the middle of the record. If this is
+ * no longer the case, something is seriously wrong with the
+ * btree. Make sure we never feed the synthesized record into
+ * the processing loop below.
+ */
+ if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) ||
+ XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
/*
* Adjust the reference count and either update the tree
@@ -1070,13 +1121,15 @@ xfs_refcount_adjust(
/*
* Ensure that no rcextents cross the boundary of the adjustment range.
*/
- error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
+ agbno, &shape_changed);
if (error)
goto out_error;
if (shape_changed)
shape_changes++;
- error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
+ agbno + aglen, &shape_changed);
if (error)
goto out_error;
if (shape_changed)
@@ -1085,8 +1138,8 @@ xfs_refcount_adjust(
/*
* Try to merge with the left or right extents of the range.
*/
- error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj,
- XFS_FIND_RCEXT_SHARED, &shape_changed);
+ error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED,
+ new_agbno, new_aglen, adj, &shape_changed);
if (error)
goto out_error;
if (shape_changed)
@@ -1125,6 +1178,32 @@ xfs_refcount_finish_one_cleanup(
}
/*
+ * Set up a continuation a deferred refcount operation by updating the intent.
+ * Checks to make sure we're not going to run off the end of the AG.
+ */
+static inline int
+xfs_refcount_continue_op(
+ struct xfs_btree_cur *cur,
+ xfs_fsblock_t startblock,
+ xfs_agblock_t new_agbno,
+ xfs_extlen_t new_len,
+ xfs_fsblock_t *new_fsbno)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_perag *pag = cur->bc_ag.pag;
+
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len)))
+ return -EFSCORRUPTED;
+
+ *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+
+ ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len));
+ ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno));
+
+ return 0;
+}
+
+/*
* Process one of the deferred refcount operations. We pass back the
* btree cursor to maintain our lock on the btree between calls.
* This saves time and eliminates a buffer deadlock between the
@@ -1191,12 +1270,20 @@ xfs_refcount_finish_one(
case XFS_REFCOUNT_INCREASE:
error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
new_len, XFS_REFCOUNT_ADJUST_INCREASE);
- *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+ if (error)
+ goto out_drop;
+ if (*new_len > 0)
+ error = xfs_refcount_continue_op(rcur, startblock,
+ new_agbno, *new_len, new_fsb);
break;
case XFS_REFCOUNT_DECREASE:
error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
new_len, XFS_REFCOUNT_ADJUST_DECREASE);
- *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+ if (error)
+ goto out_drop;
+ if (*new_len > 0)
+ error = xfs_refcount_continue_op(rcur, startblock,
+ new_agbno, *new_len, new_fsb);
break;
case XFS_REFCOUNT_ALLOC_COW:
*new_fsb = startblock + blockcount;
@@ -1307,7 +1394,8 @@ xfs_refcount_find_shared(
*flen = 0;
/* Try to find a refcount extent that crosses the start */
- error = xfs_refcount_lookup_le(cur, agbno, &have);
+ error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno,
+ &have);
if (error)
goto out_error;
if (!have) {
@@ -1325,6 +1413,8 @@ xfs_refcount_find_shared(
error = -EFSCORRUPTED;
goto out_error;
}
+ if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
+ goto done;
/* If the extent ends before the start, look at the next one */
if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
@@ -1340,6 +1430,8 @@ xfs_refcount_find_shared(
error = -EFSCORRUPTED;
goto out_error;
}
+ if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
+ goto done;
}
/* If the extent starts after the range we want, bail out */
@@ -1371,7 +1463,8 @@ xfs_refcount_find_shared(
error = -EFSCORRUPTED;
goto out_error;
}
- if (tmp.rc_startblock >= agbno + aglen ||
+ if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED ||
+ tmp.rc_startblock >= agbno + aglen ||
tmp.rc_startblock != *fbno + *flen)
break;
*flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno);
@@ -1455,17 +1548,23 @@ xfs_refcount_adjust_cow_extents(
return 0;
/* Find any overlapping refcount records */
- error = xfs_refcount_lookup_ge(cur, agbno, &found_rec);
+ error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno,
+ &found_rec);
if (error)
goto out_error;
error = xfs_refcount_get_rec(cur, &ext, &found_rec);
if (error)
goto out_error;
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec &&
+ ext.rc_domain != XFS_REFC_DOMAIN_COW)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
if (!found_rec) {
- ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks +
- XFS_REFC_COW_START;
+ ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
ext.rc_blockcount = 0;
ext.rc_refcount = 0;
+ ext.rc_domain = XFS_REFC_DOMAIN_COW;
}
switch (adj) {
@@ -1480,6 +1579,8 @@ xfs_refcount_adjust_cow_extents(
tmp.rc_startblock = agbno;
tmp.rc_blockcount = aglen;
tmp.rc_refcount = 1;
+ tmp.rc_domain = XFS_REFC_DOMAIN_COW;
+
trace_xfs_refcount_modify_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, &tmp);
@@ -1542,24 +1643,24 @@ xfs_refcount_adjust_cow(
bool shape_changed;
int error;
- agbno += XFS_REFC_COW_START;
-
/*
* Ensure that no rcextents cross the boundary of the adjustment range.
*/
- error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
+ agbno, &shape_changed);
if (error)
goto out_error;
- error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
+ agbno + aglen, &shape_changed);
if (error)
goto out_error;
/*
* Try to merge with the left or right extents of the range.
*/
- error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj,
- XFS_FIND_RCEXT_COW, &shape_changed);
+ error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno,
+ &aglen, adj, &shape_changed);
if (error)
goto out_error;
@@ -1666,10 +1767,18 @@ xfs_refcount_recover_extent(
be32_to_cpu(rec->refc.rc_refcount) != 1))
return -EFSCORRUPTED;
- rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0);
+ rr = kmalloc(sizeof(struct xfs_refcount_recovery),
+ GFP_KERNEL | __GFP_NOFAIL);
+ INIT_LIST_HEAD(&rr->rr_list);
xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
- list_add_tail(&rr->rr_list, debris);
+ if (XFS_IS_CORRUPT(cur->bc_mp,
+ rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) {
+ kfree(rr);
+ return -EFSCORRUPTED;
+ }
+
+ list_add_tail(&rr->rr_list, debris);
return 0;
}
@@ -1687,10 +1796,11 @@ xfs_refcount_recover_cow_leftovers(
union xfs_btree_irec low;
union xfs_btree_irec high;
xfs_fsblock_t fsb;
- xfs_agblock_t agbno;
int error;
- if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
+ /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */
+ BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG);
+ if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS)
return -EOPNOTSUPP;
INIT_LIST_HEAD(&debris);
@@ -1717,7 +1827,7 @@ xfs_refcount_recover_cow_leftovers(
/* Find all the leftover CoW staging extents. */
memset(&low, 0, sizeof(low));
memset(&high, 0, sizeof(high));
- low.rc.rc_startblock = XFS_REFC_COW_START;
+ low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW;
high.rc.rc_startblock = -1U;
error = xfs_btree_query_range(cur, &low, &high,
xfs_refcount_recover_extent, &debris);
@@ -1738,8 +1848,8 @@ xfs_refcount_recover_cow_leftovers(
&rr->rr_rrec);
/* Free the orphan record */
- agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
- fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno);
+ fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno,
+ rr->rr_rrec.rc_startblock);
xfs_refcount_free_cow_extent(tp, fsb,
rr->rr_rrec.rc_blockcount);
@@ -1751,7 +1861,7 @@ xfs_refcount_recover_cow_leftovers(
goto out_free;
list_del(&rr->rr_list);
- kmem_free(rr);
+ kfree(rr);
}
return error;
@@ -1761,7 +1871,7 @@ out_free:
/* Free the leftover list */
list_for_each_entry_safe(rr, n, &debris, rr_list) {
list_del(&rr->rr_list);
- kmem_free(rr);
+ kfree(rr);
}
return error;
}
@@ -1770,6 +1880,7 @@ out_free:
int
xfs_refcount_has_record(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t bno,
xfs_extlen_t len,
bool *exists)
@@ -1781,6 +1892,7 @@ xfs_refcount_has_record(
low.rc.rc_startblock = bno;
memset(&high, 0xFF, sizeof(high));
high.rc.rc_startblock = bno + len - 1;
+ low.rc.rc_domain = high.rc.rc_domain = domain;
return xfs_btree_has_record(cur, &low, &high, exists);
}
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index e8b322de7f3d..452f30556f5a 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -14,14 +14,33 @@ struct xfs_bmbt_irec;
struct xfs_refcount_irec;
extern int xfs_refcount_lookup_le(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, int *stat);
+ enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat);
extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, int *stat);
+ enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat);
extern int xfs_refcount_lookup_eq(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, int *stat);
+ enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat);
extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur,
struct xfs_refcount_irec *irec, int *stat);
+static inline uint32_t
+xfs_refcount_encode_startblock(
+ xfs_agblock_t startblock,
+ enum xfs_refc_domain domain)
+{
+ uint32_t start;
+
+ /*
+ * low level btree operations need to handle the generic btree range
+ * query functions (which set rc_domain == -1U), so we check that the
+ * domain is /not/ shared.
+ */
+ start = startblock & ~XFS_REFC_COWFLAG;
+ if (domain != XFS_REFC_DOMAIN_SHARED)
+ start |= XFS_REFC_COWFLAG;
+
+ return start;
+}
+
enum xfs_refcount_intent_type {
XFS_REFCOUNT_INCREASE = 1,
XFS_REFCOUNT_DECREASE,
@@ -36,6 +55,18 @@ struct xfs_refcount_intent {
xfs_fsblock_t ri_startblock;
};
+/* Check that the refcount is appropriate for the record domain. */
+static inline bool
+xfs_refcount_check_domain(
+ const struct xfs_refcount_irec *irec)
+{
+ if (irec->rc_domain == XFS_REFC_DOMAIN_COW && irec->rc_refcount != 1)
+ return false;
+ if (irec->rc_domain == XFS_REFC_DOMAIN_SHARED && irec->rc_refcount < 2)
+ return false;
+ return true;
+}
+
void xfs_refcount_increase_extent(struct xfs_trans *tp,
struct xfs_bmbt_irec *irec);
void xfs_refcount_decrease_extent(struct xfs_trans *tp,
@@ -79,7 +110,8 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
#define XFS_REFCOUNT_ITEM_OVERHEAD 32
extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
+ enum xfs_refc_domain domain, xfs_agblock_t bno,
+ xfs_extlen_t len, bool *exists);
union xfs_btree_rec;
extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec,
struct xfs_refcount_irec *irec);
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 316c1ec0c3c2..e1f789866683 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -13,6 +13,7 @@
#include "xfs_btree.h"
#include "xfs_btree_staging.h"
#include "xfs_refcount_btree.h"
+#include "xfs_refcount.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -160,7 +161,12 @@ xfs_refcountbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
{
- rec->refc.rc_startblock = cpu_to_be32(cur->bc_rec.rc.rc_startblock);
+ const struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
+ uint32_t start;
+
+ start = xfs_refcount_encode_startblock(irec->rc_startblock,
+ irec->rc_domain);
+ rec->refc.rc_startblock = cpu_to_be32(start);
rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount);
rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount);
}
@@ -182,10 +188,13 @@ xfs_refcountbt_key_diff(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
- struct xfs_refcount_irec *rec = &cur->bc_rec.rc;
const struct xfs_refcount_key *kp = &key->refc;
+ const struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
+ uint32_t start;
- return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
+ start = xfs_refcount_encode_startblock(irec->rc_startblock,
+ irec->rc_domain);
+ return (int64_t)be32_to_cpu(kp->rc_startblock) - start;
}
STATIC int64_t
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 094dfc897ebc..b56aca1e7c66 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -235,13 +235,8 @@ xfs_rmap_get_rec(
goto out_bad_rec;
} else {
/* check for valid extent range, including overflow */
- if (!xfs_verify_agbno(pag, irec->rm_startblock))
- goto out_bad_rec;
- if (irec->rm_startblock >
- irec->rm_startblock + irec->rm_blockcount)
- goto out_bad_rec;
- if (!xfs_verify_agbno(pag,
- irec->rm_startblock + irec->rm_blockcount - 1))
+ if (!xfs_verify_agbext(pag, irec->rm_startblock,
+ irec->rm_blockcount))
goto out_bad_rec;
}
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 2c4ad6e4bb14..5b2f27cbdb80 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -422,7 +422,7 @@ xfs_calc_itruncate_reservation_minlogsize(
/*
* In renaming a files we can modify:
- * the four inodes involved: 4 * inode size
+ * the five inodes involved: 5 * inode size
* the two directory btrees: 2 * (max depth + v2) * dir block size
* the two directory bmap btrees: 2 * max depth * block size
* And the bmap_finish transaction can free dir and bmap blocks (two sets
@@ -437,7 +437,7 @@ xfs_calc_rename_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 4) +
+ max((xfs_calc_inode_res(mp, 5) +
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index a6b7d98cf68f..5ebdda7e1078 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -166,6 +166,36 @@ typedef struct xfs_bmbt_irec
xfs_exntst_t br_state; /* extent state */
} xfs_bmbt_irec_t;
+enum xfs_refc_domain {
+ XFS_REFC_DOMAIN_SHARED = 0,
+ XFS_REFC_DOMAIN_COW,
+};
+
+#define XFS_REFC_DOMAIN_STRINGS \
+ { XFS_REFC_DOMAIN_SHARED, "shared" }, \
+ { XFS_REFC_DOMAIN_COW, "cow" }
+
+struct xfs_refcount_irec {
+ xfs_agblock_t rc_startblock; /* starting block number */
+ xfs_extlen_t rc_blockcount; /* count of free blocks */
+ xfs_nlink_t rc_refcount; /* number of inodes linked here */
+ enum xfs_refc_domain rc_domain; /* shared or cow staging extent? */
+};
+
+#define XFS_RMAP_ATTR_FORK (1 << 0)
+#define XFS_RMAP_BMBT_BLOCK (1 << 1)
+#define XFS_RMAP_UNWRITTEN (1 << 2)
+#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \
+ XFS_RMAP_BMBT_BLOCK)
+#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN)
+struct xfs_rmap_irec {
+ xfs_agblock_t rm_startblock; /* extent start block */
+ xfs_extlen_t rm_blockcount; /* extent length */
+ uint64_t rm_owner; /* extent owner */
+ uint64_t rm_offset; /* offset within the owner */
+ unsigned int rm_flags; /* state flags */
+};
+
/* per-AG block reservation types */
enum xfs_ag_resv_type {
XFS_AG_RESV_NONE = 0,
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index ab427b4d7fe0..3b38f4e2a537 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -100,9 +100,7 @@ xchk_allocbt_rec(
bno = be32_to_cpu(rec->alloc.ar_startblock);
len = be32_to_cpu(rec->alloc.ar_blockcount);
- if (bno + len <= bno ||
- !xfs_verify_agbno(pag, bno) ||
- !xfs_verify_agbno(pag, bno + len - 1))
+ if (!xfs_verify_agbext(pag, bno, len))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
xchk_allocbt_xref(bs->sc, bno, len);
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 5abb5fdb71d9..5c87800ab223 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -99,7 +99,7 @@ out:
* we check the inode number to make sure it's sane, then we check that
* we can look up this filename. Finally, we check the ftype.
*/
-STATIC int
+STATIC bool
xchk_dir_actor(
struct dir_context *dir_iter,
const char *name,
@@ -124,7 +124,7 @@ xchk_dir_actor(
xfs_dir2_dataptr_to_db(mp->m_dir_geo, pos));
if (xchk_should_terminate(sdc->sc, &error))
- return error;
+ return !error;
/* Does this inode number make sense? */
if (!xfs_verify_dir_ino(mp, ino)) {
@@ -191,8 +191,8 @@ out:
* and return zero to xchk_directory.
*/
if (error == 0 && sdc->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return -EFSCORRUPTED;
- return error;
+ return false;
+ return !error;
}
/* Scrub a directory btree record. */
@@ -676,7 +676,7 @@ xchk_directory_blocks(
xfs_dablk_t dabno;
xfs_dir2_db_t last_data_db = 0;
bool found;
- int is_block = 0;
+ bool is_block = false;
int error;
/* Ignore local format directories. */
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index e1026e07bf94..e312be7cd375 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -108,9 +108,8 @@ xchk_iallocbt_chunk(
xfs_agblock_t bno;
bno = XFS_AGINO_TO_AGBNO(mp, agino);
- if (bno + len <= bno ||
- !xfs_verify_agbno(pag, bno) ||
- !xfs_verify_agbno(pag, bno + len - 1))
+
+ if (!xfs_verify_agbext(pag, bno, len))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
xchk_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len);
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index ab182a5cd0c0..d8dff3fd8053 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -38,7 +38,7 @@ struct xchk_parent_ctx {
};
/* Look for a single entry in a directory pointing to an inode. */
-STATIC int
+STATIC bool
xchk_parent_actor(
struct dir_context *dc,
const char *name,
@@ -62,7 +62,7 @@ xchk_parent_actor(
if (xchk_should_terminate(spc->sc, &error))
spc->cancelled = true;
- return error;
+ return !error;
}
/* Count the number of dentries in the parent dir that point to this inode. */
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index c68b767dc08f..a26ee0f24ef2 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -269,15 +269,13 @@ done:
STATIC void
xchk_refcountbt_xref_rmap(
struct xfs_scrub *sc,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- xfs_nlink_t refcount)
+ const struct xfs_refcount_irec *irec)
{
struct xchk_refcnt_check refchk = {
- .sc = sc,
- .bno = bno,
- .len = len,
- .refcount = refcount,
+ .sc = sc,
+ .bno = irec->rc_startblock,
+ .len = irec->rc_blockcount,
+ .refcount = irec->rc_refcount,
.seen = 0,
};
struct xfs_rmap_irec low;
@@ -291,9 +289,9 @@ xchk_refcountbt_xref_rmap(
/* Cross-reference with the rmapbt to confirm the refcount. */
memset(&low, 0, sizeof(low));
- low.rm_startblock = bno;
+ low.rm_startblock = irec->rc_startblock;
memset(&high, 0xFF, sizeof(high));
- high.rm_startblock = bno + len - 1;
+ high.rm_startblock = irec->rc_startblock + irec->rc_blockcount - 1;
INIT_LIST_HEAD(&refchk.fragments);
error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
@@ -302,7 +300,7 @@ xchk_refcountbt_xref_rmap(
goto out_free;
xchk_refcountbt_process_rmap_fragments(&refchk);
- if (refcount != refchk.seen)
+ if (irec->rc_refcount != refchk.seen)
xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
out_free:
@@ -315,17 +313,16 @@ out_free:
/* Cross-reference with the other btrees. */
STATIC void
xchk_refcountbt_xref(
- struct xfs_scrub *sc,
- xfs_agblock_t agbno,
- xfs_extlen_t len,
- xfs_nlink_t refcount)
+ struct xfs_scrub *sc,
+ const struct xfs_refcount_irec *irec)
{
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return;
- xchk_xref_is_used_space(sc, agbno, len);
- xchk_xref_is_not_inode_chunk(sc, agbno, len);
- xchk_refcountbt_xref_rmap(sc, agbno, len, refcount);
+ xchk_xref_is_used_space(sc, irec->rc_startblock, irec->rc_blockcount);
+ xchk_xref_is_not_inode_chunk(sc, irec->rc_startblock,
+ irec->rc_blockcount);
+ xchk_refcountbt_xref_rmap(sc, irec);
}
/* Scrub a refcountbt record. */
@@ -334,35 +331,27 @@ xchk_refcountbt_rec(
struct xchk_btree *bs,
const union xfs_btree_rec *rec)
{
+ struct xfs_refcount_irec irec;
xfs_agblock_t *cow_blocks = bs->private;
struct xfs_perag *pag = bs->cur->bc_ag.pag;
- xfs_agblock_t bno;
- xfs_extlen_t len;
- xfs_nlink_t refcount;
- bool has_cowflag;
- bno = be32_to_cpu(rec->refc.rc_startblock);
- len = be32_to_cpu(rec->refc.rc_blockcount);
- refcount = be32_to_cpu(rec->refc.rc_refcount);
+ xfs_refcount_btrec_to_irec(rec, &irec);
- /* Only CoW records can have refcount == 1. */
- has_cowflag = (bno & XFS_REFC_COW_START);
- if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
+ /* Check the domain and refcount are not incompatible. */
+ if (!xfs_refcount_check_domain(&irec))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
- if (has_cowflag)
- (*cow_blocks) += len;
+
+ if (irec.rc_domain == XFS_REFC_DOMAIN_COW)
+ (*cow_blocks) += irec.rc_blockcount;
/* Check the extent. */
- bno &= ~XFS_REFC_COW_START;
- if (bno + len <= bno ||
- !xfs_verify_agbno(pag, bno) ||
- !xfs_verify_agbno(pag, bno + len - 1))
+ if (!xfs_verify_agbext(pag, irec.rc_startblock, irec.rc_blockcount))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
- if (refcount == 0)
+ if (irec.rc_refcount == 0)
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
- xchk_refcountbt_xref(bs->sc, bno, len, refcount);
+ xchk_refcountbt_xref(bs->sc, &irec);
return 0;
}
@@ -426,7 +415,6 @@ xchk_xref_is_cow_staging(
xfs_extlen_t len)
{
struct xfs_refcount_irec rc;
- bool has_cowflag;
int has_refcount;
int error;
@@ -434,8 +422,8 @@ xchk_xref_is_cow_staging(
return;
/* Find the CoW staging extent. */
- error = xfs_refcount_lookup_le(sc->sa.refc_cur,
- agbno + XFS_REFC_COW_START, &has_refcount);
+ error = xfs_refcount_lookup_le(sc->sa.refc_cur, XFS_REFC_DOMAIN_COW,
+ agbno, &has_refcount);
if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
return;
if (!has_refcount) {
@@ -451,9 +439,8 @@ xchk_xref_is_cow_staging(
return;
}
- /* CoW flag must be set, refcount must be 1. */
- has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START);
- if (!has_cowflag || rc.rc_refcount != 1)
+ /* CoW lookup returned a shared extent record? */
+ if (rc.rc_domain != XFS_REFC_DOMAIN_COW)
xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
/* Must be at least as long as what was passed in */
@@ -477,7 +464,8 @@ xchk_xref_is_not_shared(
if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
return;
- error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared);
+ error = xfs_refcount_has_record(sc->sa.refc_cur, XFS_REFC_DOMAIN_SHARED,
+ agbno, len, &shared);
if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
return;
if (shared)
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index 5077a7ad5646..2788a6f2edcd 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -86,8 +86,6 @@ xfs_attri_log_nameval_alloc(
*/
nv = xlog_kvmalloc(sizeof(struct xfs_attri_log_nameval) +
name_len + value_len);
- if (!nv)
- return nv;
nv->name.i_addr = nv + 1;
nv->name.i_len = name_len;
@@ -247,28 +245,6 @@ xfs_attri_init(
return attrip;
}
-/*
- * Copy an attr format buffer from the given buf, and into the destination attr
- * format structure.
- */
-STATIC int
-xfs_attri_copy_format(
- struct xfs_log_iovec *buf,
- struct xfs_attri_log_format *dst_attr_fmt)
-{
- struct xfs_attri_log_format *src_attr_fmt = buf->i_addr;
- size_t len;
-
- len = sizeof(struct xfs_attri_log_format);
- if (buf->i_len != len) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
- return -EFSCORRUPTED;
- }
-
- memcpy((char *)dst_attr_fmt, (char *)src_attr_fmt, len);
- return 0;
-}
-
static inline struct xfs_attrd_log_item *ATTRD_ITEM(struct xfs_log_item *lip)
{
return container_of(lip, struct xfs_attrd_log_item, attrd_item);
@@ -441,8 +417,6 @@ xfs_attr_create_intent(
attr->xattri_nameval = xfs_attri_log_nameval_alloc(args->name,
args->namelen, args->value, args->valuelen);
}
- if (!attr->xattri_nameval)
- return ERR_PTR(-ENOMEM);
attrip = xfs_attri_init(mp, attr->xattri_nameval);
xfs_trans_add_item(tp, &attrip->attri_item);
@@ -735,24 +709,50 @@ xlog_recover_attri_commit_pass2(
struct xfs_attri_log_nameval *nv;
const void *attr_value = NULL;
const void *attr_name;
- int error;
+ size_t len;
attri_formatp = item->ri_buf[0].i_addr;
attr_name = item->ri_buf[1].i_addr;
/* Validate xfs_attri_log_format before the large memory allocation */
+ len = sizeof(struct xfs_attri_log_format);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
if (!xfs_attri_validate(mp, attri_formatp)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ /* Validate the attr name */
+ if (item->ri_buf[1].i_len !=
+ xlog_calc_iovec_len(attri_formatp->alfi_name_len)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[1].i_addr, item->ri_buf[1].i_len);
return -EFSCORRUPTED;
}
- if (attri_formatp->alfi_value_len)
+ /* Validate the attr value, if present */
+ if (attri_formatp->alfi_value_len != 0) {
+ if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr,
+ item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
attr_value = item->ri_buf[2].i_addr;
+ }
/*
* Memory alloc failure will cause replay to abort. We attach the
@@ -762,13 +762,9 @@ xlog_recover_attri_commit_pass2(
nv = xfs_attri_log_nameval_alloc(attr_name,
attri_formatp->alfi_name_len, attr_value,
attri_formatp->alfi_value_len);
- if (!nv)
- return -ENOMEM;
attrip = xfs_attri_init(mp, nv);
- error = xfs_attri_copy_format(&item->ri_buf[0], &attrip->attri_format);
- if (error)
- goto out;
+ memcpy(&attrip->attri_format, attri_formatp, len);
/*
* The ATTRI has two references. One for the ATTRD and one for ATTRI to
@@ -780,10 +776,6 @@ xlog_recover_attri_commit_pass2(
xfs_attri_release(attrip);
xfs_attri_log_nameval_put(nv);
return 0;
-out:
- xfs_attri_item_free(attrip);
- xfs_attri_log_nameval_put(nv);
- return error;
}
/*
@@ -848,7 +840,8 @@ xlog_recover_attrd_commit_pass2(
attrd_formatp = item->ri_buf[0].i_addr;
if (item->ri_buf[0].i_len != sizeof(struct xfs_attrd_log_format)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 51f66e982484..41323da523d1 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -608,28 +608,18 @@ static const struct xfs_item_ops xfs_bui_item_ops = {
.iop_relog = xfs_bui_item_relog,
};
-/*
- * Copy an BUI format buffer from the given buf, and into the destination
- * BUI format structure. The BUI/BUD items were designed not to need any
- * special alignment handling.
- */
-static int
+static inline void
xfs_bui_copy_format(
- struct xfs_log_iovec *buf,
- struct xfs_bui_log_format *dst_bui_fmt)
+ struct xfs_bui_log_format *dst,
+ const struct xfs_bui_log_format *src)
{
- struct xfs_bui_log_format *src_bui_fmt;
- uint len;
+ unsigned int i;
- src_bui_fmt = buf->i_addr;
- len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents);
+ memcpy(dst, src, offsetof(struct xfs_bui_log_format, bui_extents));
- if (buf->i_len == len) {
- memcpy(dst_bui_fmt, src_bui_fmt, len);
- return 0;
- }
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
- return -EFSCORRUPTED;
+ for (i = 0; i < src->bui_nextents; i++)
+ memcpy(&dst->bui_extents[i], &src->bui_extents[i],
+ sizeof(struct xfs_map_extent));
}
/*
@@ -646,23 +636,34 @@ xlog_recover_bui_commit_pass2(
struct xlog_recover_item *item,
xfs_lsn_t lsn)
{
- int error;
struct xfs_mount *mp = log->l_mp;
struct xfs_bui_log_item *buip;
struct xfs_bui_log_format *bui_formatp;
+ size_t len;
bui_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len < xfs_bui_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
- buip = xfs_bui_init(mp);
- error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format);
- if (error) {
- xfs_bui_item_free(buip);
- return error;
+
+ len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
}
+
+ buip = xfs_bui_init(mp);
+ xfs_bui_copy_format(&buip->bui_format, bui_formatp);
atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
/*
* Insert the intent into the AIL directly and drop one reference so
@@ -696,7 +697,8 @@ xlog_recover_bud_commit_pass2(
bud_formatp = item->ri_buf[0].i_addr;
if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index e295fc8062d8..9f3ceb461515 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -512,7 +512,7 @@ xfs_readdir(
{
struct xfs_da_args args = { NULL };
unsigned int lock_mode;
- int isblock;
+ bool isblock;
int error;
trace_xfs_readdir(dp);
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 296faa41d81d..c6b2aabd6f18 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -234,13 +234,18 @@ int
xfs_errortag_init(
struct xfs_mount *mp)
{
+ int ret;
+
mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX,
KM_MAYFAIL);
if (!mp->m_errortag)
return -ENOMEM;
- return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
- &mp->m_kobj, "errortag");
+ ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
+ &mp->m_kobj, "errortag");
+ if (ret)
+ kmem_free(mp->m_errortag);
+ return ret;
}
void
@@ -274,7 +279,7 @@ xfs_errortag_test(
ASSERT(error_tag < XFS_ERRTAG_MAX);
randfactor = mp->m_errortag[error_tag];
- if (!randfactor || prandom_u32() % randfactor)
+ if (!randfactor || prandom_u32_max(randfactor))
return false;
xfs_warn_ratelimited(mp,
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 27ccfcd82f04..d5130d1fcfae 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -66,27 +66,16 @@ xfs_efi_release(
xfs_efi_item_free(efip);
}
-/*
- * This returns the number of iovecs needed to log the given efi item.
- * We only need 1 iovec for an efi item. It just logs the efi_log_format
- * structure.
- */
-static inline int
-xfs_efi_item_sizeof(
- struct xfs_efi_log_item *efip)
-{
- return sizeof(struct xfs_efi_log_format) +
- (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t);
-}
-
STATIC void
xfs_efi_item_size(
struct xfs_log_item *lip,
int *nvecs,
int *nbytes)
{
+ struct xfs_efi_log_item *efip = EFI_ITEM(lip);
+
*nvecs += 1;
- *nbytes += xfs_efi_item_sizeof(EFI_ITEM(lip));
+ *nbytes += xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents);
}
/*
@@ -112,7 +101,7 @@ xfs_efi_item_format(
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT,
&efip->efi_format,
- xfs_efi_item_sizeof(efip));
+ xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents));
}
@@ -155,13 +144,11 @@ xfs_efi_init(
{
struct xfs_efi_log_item *efip;
- uint size;
ASSERT(nextents > 0);
if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
- size = (uint)(sizeof(struct xfs_efi_log_item) +
- ((nextents - 1) * sizeof(xfs_extent_t)));
- efip = kmem_zalloc(size, 0);
+ efip = kzalloc(xfs_efi_log_item_sizeof(nextents),
+ GFP_KERNEL | __GFP_NOFAIL);
} else {
efip = kmem_cache_zalloc(xfs_efi_cache,
GFP_KERNEL | __GFP_NOFAIL);
@@ -188,15 +175,17 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
{
xfs_efi_log_format_t *src_efi_fmt = buf->i_addr;
uint i;
- uint len = sizeof(xfs_efi_log_format_t) +
- (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t);
- uint len32 = sizeof(xfs_efi_log_format_32_t) +
- (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_32_t);
- uint len64 = sizeof(xfs_efi_log_format_64_t) +
- (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_64_t);
+ uint len = xfs_efi_log_format_sizeof(src_efi_fmt->efi_nextents);
+ uint len32 = xfs_efi_log_format32_sizeof(src_efi_fmt->efi_nextents);
+ uint len64 = xfs_efi_log_format64_sizeof(src_efi_fmt->efi_nextents);
if (buf->i_len == len) {
- memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len);
+ memcpy(dst_efi_fmt, src_efi_fmt,
+ offsetof(struct xfs_efi_log_format, efi_extents));
+ for (i = 0; i < src_efi_fmt->efi_nextents; i++)
+ memcpy(&dst_efi_fmt->efi_extents[i],
+ &src_efi_fmt->efi_extents[i],
+ sizeof(struct xfs_extent));
return 0;
} else if (buf->i_len == len32) {
xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr;
@@ -227,7 +216,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
}
return 0;
}
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, NULL, buf->i_addr,
+ buf->i_len);
return -EFSCORRUPTED;
}
@@ -246,27 +236,16 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp)
kmem_cache_free(xfs_efd_cache, efdp);
}
-/*
- * This returns the number of iovecs needed to log the given efd item.
- * We only need 1 iovec for an efd item. It just logs the efd_log_format
- * structure.
- */
-static inline int
-xfs_efd_item_sizeof(
- struct xfs_efd_log_item *efdp)
-{
- return sizeof(xfs_efd_log_format_t) +
- (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t);
-}
-
STATIC void
xfs_efd_item_size(
struct xfs_log_item *lip,
int *nvecs,
int *nbytes)
{
+ struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+
*nvecs += 1;
- *nbytes += xfs_efd_item_sizeof(EFD_ITEM(lip));
+ *nbytes += xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents);
}
/*
@@ -291,7 +270,7 @@ xfs_efd_item_format(
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT,
&efdp->efd_format,
- xfs_efd_item_sizeof(efdp));
+ xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents));
}
/*
@@ -340,9 +319,8 @@ xfs_trans_get_efd(
ASSERT(nextents > 0);
if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
- efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) +
- (nextents - 1) * sizeof(struct xfs_extent),
- 0);
+ efdp = kzalloc(xfs_efd_log_item_sizeof(nextents),
+ GFP_KERNEL | __GFP_NOFAIL);
} else {
efdp = kmem_cache_zalloc(xfs_efd_cache,
GFP_KERNEL | __GFP_NOFAIL);
@@ -733,6 +711,12 @@ xlog_recover_efi_commit_pass2(
efi_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
if (error) {
@@ -769,12 +753,24 @@ xlog_recover_efd_commit_pass2(
xfs_lsn_t lsn)
{
struct xfs_efd_log_format *efd_formatp;
+ int buflen = item->ri_buf[0].i_len;
efd_formatp = item->ri_buf[0].i_addr;
- ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
- ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
- (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
- ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
+
+ if (buflen < sizeof(struct xfs_efd_log_format)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ efd_formatp, buflen);
+ return -EFSCORRUPTED;
+ }
+
+ if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof(
+ efd_formatp->efd_nextents) &&
+ item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof(
+ efd_formatp->efd_nextents)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ efd_formatp, buflen);
+ return -EFSCORRUPTED;
+ }
xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id);
return 0;
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 186d0f2137f1..da6a5afa607c 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -52,6 +52,14 @@ struct xfs_efi_log_item {
xfs_efi_log_format_t efi_format;
};
+static inline size_t
+xfs_efi_log_item_sizeof(
+ unsigned int nr)
+{
+ return offsetof(struct xfs_efi_log_item, efi_format) +
+ xfs_efi_log_format_sizeof(nr);
+}
+
/*
* This is the "extent free done" log item. It is used to log
* the fact that some extents earlier mentioned in an efi item
@@ -64,6 +72,14 @@ struct xfs_efd_log_item {
xfs_efd_log_format_t efd_format;
};
+static inline size_t
+xfs_efd_log_item_sizeof(
+ unsigned int nr)
+{
+ return offsetof(struct xfs_efd_log_item, efd_format) +
+ xfs_efd_log_format_sizeof(nr);
+}
+
/*
* Max number of extents in fast allocation path.
*/
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c6c80265c0b2..e462d39c840e 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1261,7 +1261,7 @@ xfs_file_llseek(
}
#ifdef CONFIG_FS_DAX
-static int
+static inline vm_fault_t
xfs_dax_fault(
struct vm_fault *vmf,
enum page_entry_size pe_size,
@@ -1274,14 +1274,15 @@ xfs_dax_fault(
&xfs_read_iomap_ops);
}
#else
-static int
+static inline vm_fault_t
xfs_dax_fault(
struct vm_fault *vmf,
enum page_entry_size pe_size,
bool write_fault,
pfn_t *pfn)
{
- return 0;
+ ASSERT(0);
+ return VM_FAULT_SIGBUS;
}
#endif
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 2bbe7916a998..eae7427062cf 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -596,7 +596,7 @@ xfs_iget_cache_miss(
*/
if (xfs_has_v3inodes(mp) &&
(flags & XFS_IGET_CREATE) && !xfs_has_ikeep(mp)) {
- VFS_I(ip)->i_generation = prandom_u32();
+ VFS_I(ip)->i_generation = get_random_u32();
} else {
struct xfs_buf *bp;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 28493c8e9bb2..aa303be11576 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -835,9 +835,8 @@ xfs_init_new_inode(
* ID or one of the supplementary group IDs, the S_ISGID bit is cleared
* (and only if the irix_sgid_inherit compatibility variable is set).
*/
- if (irix_sgid_inherit &&
- (inode->i_mode & S_ISGID) &&
- !in_group_p(i_gid_into_mnt(mnt_userns, inode)))
+ if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
+ !vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode)))
inode->i_mode &= ~S_ISGID;
ip->i_disk_size = 0;
@@ -2819,7 +2818,7 @@ retry:
* Lock all the participating inodes. Depending upon whether
* the target_name exists in the target directory, and
* whether the target directory is the same as the source
- * directory, we can lock from 2 to 4 inodes.
+ * directory, we can lock from 2 to 5 inodes.
*/
xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
@@ -3119,7 +3118,7 @@ xfs_iflush(
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT,
+ "%s: Bad inode %llu magic number 0x%x, ptr "PTR_FMT,
__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
goto flush_out;
}
@@ -3129,7 +3128,7 @@ xfs_iflush(
ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
mp, XFS_ERRTAG_IFLUSH_3)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: Bad regular inode %Lu, ptr "PTR_FMT,
+ "%s: Bad regular inode %llu, ptr "PTR_FMT,
__func__, ip->i_ino, ip);
goto flush_out;
}
@@ -3140,7 +3139,7 @@ xfs_iflush(
ip->i_df.if_format != XFS_DINODE_FMT_LOCAL,
mp, XFS_ERRTAG_IFLUSH_4)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: Bad directory inode %Lu, ptr "PTR_FMT,
+ "%s: Bad directory inode %llu, ptr "PTR_FMT,
__func__, ip->i_ino, ip);
goto flush_out;
}
@@ -3158,7 +3157,7 @@ xfs_iflush(
if (XFS_TEST_ERROR(ip->i_forkoff > mp->m_sb.sb_inodesize,
mp, XFS_ERRTAG_IFLUSH_6)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT,
+ "%s: bad inode %llu, forkoff 0x%x, ptr "PTR_FMT,
__func__, ip->i_ino, ip->i_forkoff, ip);
goto flush_out;
}
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6e19ece916bf..ca2941ab6cbc 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -550,7 +550,7 @@ xfs_inode_item_push(
if (!bp || (ip->i_flags & XFS_ISTALE)) {
/*
- * Inode item/buffer is being being aborted due to cluster
+ * Inode item/buffer is being aborted due to cluster
* buffer deletion. Trigger a log force to have that operation
* completed and items removed from the AIL before the next push
* attempt.
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index d28ffaebd067..0e5dba2343ea 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -321,7 +321,7 @@ xlog_recover_inode_commit_pass2(
*/
if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) {
xfs_alert(mp,
- "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
+ "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %lld",
__func__, dip, bp, in_f->ilf_ino);
error = -EFSCORRUPTED;
goto out_release;
@@ -329,7 +329,7 @@ xlog_recover_inode_commit_pass2(
ldip = item->ri_buf[1].i_addr;
if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) {
xfs_alert(mp,
- "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
+ "%s: Bad inode log record, rec ptr "PTR_FMT", ino %lld",
__func__, item, in_f->ilf_ino);
error = -EFSCORRUPTED;
goto out_release;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 45518b8c613c..2e10e1c66ad6 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -167,7 +167,7 @@ xfs_generic_create(
struct dentry *dentry,
umode_t mode,
dev_t rdev,
- bool tmpfile) /* unnamed file */
+ struct file *tmpfile) /* unnamed file */
{
struct inode *inode;
struct xfs_inode *ip = NULL;
@@ -234,7 +234,7 @@ xfs_generic_create(
* d_tmpfile can immediately set it back to zero.
*/
set_nlink(inode, 1);
- d_tmpfile(dentry, inode);
+ d_tmpfile(tmpfile, inode);
} else
d_instantiate(dentry, inode);
@@ -261,7 +261,7 @@ xfs_vn_mknod(
umode_t mode,
dev_t rdev)
{
- return xfs_generic_create(mnt_userns, dir, dentry, mode, rdev, false);
+ return xfs_generic_create(mnt_userns, dir, dentry, mode, rdev, NULL);
}
STATIC int
@@ -272,7 +272,7 @@ xfs_vn_create(
umode_t mode,
bool flags)
{
- return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, false);
+ return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, NULL);
}
STATIC int
@@ -283,7 +283,7 @@ xfs_vn_mkdir(
umode_t mode)
{
return xfs_generic_create(mnt_userns, dir, dentry, mode | S_IFDIR, 0,
- false);
+ NULL);
}
STATIC struct dentry *
@@ -558,6 +558,8 @@ xfs_vn_getattr(
struct inode *inode = d_inode(path->dentry);
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
+ vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
trace_xfs_getattr(ip);
@@ -568,8 +570,8 @@ xfs_vn_getattr(
stat->dev = inode->i_sb->s_dev;
stat->mode = inode->i_mode;
stat->nlink = inode->i_nlink;
- stat->uid = i_uid_into_mnt(mnt_userns, inode);
- stat->gid = i_gid_into_mnt(mnt_userns, inode);
+ stat->uid = vfsuid_into_kuid(vfsuid);
+ stat->gid = vfsgid_into_kgid(vfsgid);
stat->ino = ip->i_ino;
stat->atime = inode->i_atime;
stat->mtime = inode->i_mtime;
@@ -604,6 +606,16 @@ xfs_vn_getattr(
stat->blksize = BLKDEV_IOSIZE;
stat->rdev = inode->i_rdev;
break;
+ case S_IFREG:
+ if (request_mask & STATX_DIOALIGN) {
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
+ struct block_device *bdev = target->bt_bdev;
+
+ stat->result_mask |= STATX_DIOALIGN;
+ stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
+ stat->dio_offset_align = bdev_logical_block_size(bdev);
+ }
+ fallthrough;
default:
stat->blksize = xfs_stat_blksize(ip);
stat->rdev = 0;
@@ -1080,10 +1092,12 @@ STATIC int
xfs_vn_tmpfile(
struct user_namespace *mnt_userns,
struct inode *dir,
- struct dentry *dentry,
+ struct file *file,
umode_t mode)
{
- return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, true);
+ int err = xfs_generic_create(mnt_userns, dir, file->f_path.dentry, mode, 0, file);
+
+ return finish_open_simple(file, err);
}
static const struct inode_operations xfs_inode_operations = {
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index cb5fc68c9ea0..e570dcb5df8d 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -13,7 +13,6 @@ extern const struct file_operations xfs_dir_file_operations;
extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
-extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr);
int xfs_vn_setattr_size(struct user_namespace *mnt_userns,
struct dentry *dentry, struct iattr *vap);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 36312b00b164..a1c2bcf65d37 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -66,6 +66,8 @@ xfs_bulkstat_one_int(
struct xfs_bulkstat *buf = bc->buf;
xfs_extnum_t nextents;
int error = -EINVAL;
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
if (xfs_internal_inum(mp, ino))
goto out_advance;
@@ -81,14 +83,16 @@ xfs_bulkstat_one_int(
ASSERT(ip != NULL);
ASSERT(ip->i_imap.im_blkno != 0);
inode = VFS_I(ip);
+ vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
/* xfs_iget returns the following without needing
* further change.
*/
buf->bs_projectid = ip->i_projid;
buf->bs_ino = ino;
- buf->bs_uid = from_kuid(sb_userns, i_uid_into_mnt(mnt_userns, inode));
- buf->bs_gid = from_kgid(sb_userns, i_gid_into_mnt(mnt_userns, inode));
+ buf->bs_uid = from_kuid(sb_userns, vfsuid_into_kuid(vfsuid));
+ buf->bs_gid = from_kgid(sb_userns, vfsgid_into_kgid(vfsgid));
buf->bs_size = ip->i_disk_size;
buf->bs_nlink = inode->i_nlink;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 386b0307aed8..f02a0dd522b3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -226,12 +226,12 @@ xlog_ticket_reservation(
if (head == &log->l_write_head) {
ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
return tic->t_unit_res;
- } else {
- if (tic->t_flags & XLOG_TIC_PERM_RESERV)
- return tic->t_unit_res * tic->t_cnt;
- else
- return tic->t_unit_res;
}
+
+ if (tic->t_flags & XLOG_TIC_PERM_RESERV)
+ return tic->t_unit_res * tic->t_cnt;
+
+ return tic->t_unit_res;
}
STATIC bool
@@ -3544,7 +3544,7 @@ xlog_ticket_alloc(
tic->t_curr_res = unit_res;
tic->t_cnt = cnt;
tic->t_ocnt = cnt;
- tic->t_tid = prandom_u32();
+ tic->t_tid = get_random_u32();
if (permanent)
tic->t_flags |= XLOG_TIC_PERM_RESERV;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 17e923b9c5fa..322eb2ee6c55 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2552,6 +2552,8 @@ xlog_recover_process_intents(
for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
lip != NULL;
lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
+ const struct xfs_item_ops *ops;
+
if (!xlog_item_is_intent(lip))
break;
@@ -2567,13 +2569,17 @@ xlog_recover_process_intents(
* deferred ops, you /must/ attach them to the capture list in
* the recover routine or else those subsequent intents will be
* replayed in the wrong order!
+ *
+ * The recovery function can free the log item, so we must not
+ * access lip after it returns.
*/
spin_unlock(&ailp->ail_lock);
- error = lip->li_ops->iop_recover(lip, &capture_list);
+ ops = lip->li_ops;
+ error = ops->iop_recover(lip, &capture_list);
spin_lock(&ailp->ail_lock);
if (error) {
trace_xlog_intent_recovery_failed(log->l_mp, error,
- lip->li_ops->iop_recover);
+ ops->iop_recover);
break;
}
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f10c88cee116..e8bb3c2e847e 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -300,26 +300,28 @@ xfs_validate_new_dalign(
"alignment check failed: sunit/swidth vs. blocksize(%d)",
mp->m_sb.sb_blocksize);
return -EINVAL;
- } else {
- /*
- * Convert the stripe unit and width to FSBs.
- */
- mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
- if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
- xfs_warn(mp,
- "alignment check failed: sunit/swidth vs. agsize(%d)",
- mp->m_sb.sb_agblocks);
- return -EINVAL;
- } else if (mp->m_dalign) {
- mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
- } else {
- xfs_warn(mp,
- "alignment check failed: sunit(%d) less than bsize(%d)",
- mp->m_dalign, mp->m_sb.sb_blocksize);
- return -EINVAL;
- }
}
+ /*
+ * Convert the stripe unit and width to FSBs.
+ */
+ mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
+ if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
+ xfs_warn(mp,
+ "alignment check failed: sunit/swidth vs. agsize(%d)",
+ mp->m_sb.sb_agblocks);
+ return -EINVAL;
+ }
+
+ if (!mp->m_dalign) {
+ xfs_warn(mp,
+ "alignment check failed: sunit(%d) less than bsize(%d)",
+ mp->m_dalign, mp->m_sb.sb_blocksize);
+ return -EINVAL;
+ }
+
+ mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
+
if (!xfs_has_dalign(mp)) {
xfs_warn(mp,
"cannot change alignment: superblock does not support data alignment");
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index 69d9c83ea4b2..c4078d0ec108 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -23,17 +23,18 @@
#include <linux/mm.h>
#include <linux/dax.h>
-struct failure_info {
+struct xfs_failure_info {
xfs_agblock_t startblock;
xfs_extlen_t blockcount;
int mf_flags;
+ bool want_shutdown;
};
static pgoff_t
xfs_failure_pgoff(
struct xfs_mount *mp,
const struct xfs_rmap_irec *rec,
- const struct failure_info *notify)
+ const struct xfs_failure_info *notify)
{
loff_t pos = XFS_FSB_TO_B(mp, rec->rm_offset);
@@ -47,7 +48,7 @@ static unsigned long
xfs_failure_pgcnt(
struct xfs_mount *mp,
const struct xfs_rmap_irec *rec,
- const struct failure_info *notify)
+ const struct xfs_failure_info *notify)
{
xfs_agblock_t end_rec;
xfs_agblock_t end_notify;
@@ -71,13 +72,13 @@ xfs_dax_failure_fn(
{
struct xfs_mount *mp = cur->bc_mp;
struct xfs_inode *ip;
- struct failure_info *notify = data;
+ struct xfs_failure_info *notify = data;
int error = 0;
if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))) {
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
- return -EFSCORRUPTED;
+ notify->want_shutdown = true;
+ return 0;
}
/* Get files that incore, filter out others that are not in use. */
@@ -86,8 +87,10 @@ xfs_dax_failure_fn(
/* Continue the rmap query if the inode isn't incore */
if (error == -ENODATA)
return 0;
- if (error)
- return error;
+ if (error) {
+ notify->want_shutdown = true;
+ return 0;
+ }
error = mf_dax_kill_procs(VFS_I(ip)->i_mapping,
xfs_failure_pgoff(mp, rec, notify),
@@ -104,6 +107,7 @@ xfs_dax_notify_ddev_failure(
xfs_daddr_t bblen,
int mf_flags)
{
+ struct xfs_failure_info notify = { .mf_flags = mf_flags };
struct xfs_trans *tp = NULL;
struct xfs_btree_cur *cur = NULL;
struct xfs_buf *agf_bp = NULL;
@@ -120,7 +124,6 @@ xfs_dax_notify_ddev_failure(
for (; agno <= end_agno; agno++) {
struct xfs_rmap_irec ri_low = { };
struct xfs_rmap_irec ri_high;
- struct failure_info notify;
struct xfs_agf *agf;
xfs_agblock_t agend;
struct xfs_perag *pag;
@@ -161,6 +164,11 @@ xfs_dax_notify_ddev_failure(
}
xfs_trans_cancel(tp);
+ if (error || notify.want_shutdown) {
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
+ if (!error)
+ error = -EFSCORRUPTED;
+ }
return error;
}
@@ -175,13 +183,13 @@ xfs_dax_notify_failure(
u64 ddev_start;
u64 ddev_end;
- if (!(mp->m_sb.sb_flags & SB_BORN)) {
+ if (!(mp->m_super->s_flags & SB_BORN)) {
xfs_warn(mp, "filesystem is not ready for notify_failure()!");
return -EIO;
}
if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_daxdev == dax_dev) {
- xfs_warn(mp,
+ xfs_debug(mp,
"notify_failure() not supported on realtime device!");
return -EOPNOTSUPP;
}
@@ -194,7 +202,7 @@ xfs_dax_notify_failure(
}
if (!xfs_has_rmapbt(mp)) {
- xfs_warn(mp, "notify_failure() needs rmapbt enabled!");
+ xfs_debug(mp, "notify_failure() needs rmapbt enabled!");
return -EOPNOTSUPP;
}
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 758702b9495f..9737b5a9f405 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -118,10 +118,10 @@ xfs_check_ondisk_structs(void)
/* log structures */
XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88);
XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24);
- XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 28);
- XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 32);
- XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 28);
- XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32, 12);
XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176);
@@ -134,6 +134,21 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_attri_log_format, 40);
XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bui_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_cui_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_cud_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rui_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16);
+
+ XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_rui_log_format, rui_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_efi_log_format, efi_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16);
/*
* The v5 superblock format extended several v4 header structures with
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 7e97bf19793d..858e3e9eb4a8 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -523,7 +523,9 @@ xfs_cui_item_recover(
type = refc_type;
break;
default:
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ &cuip->cui_format,
+ sizeof(cuip->cui_format));
error = -EFSCORRUPTED;
goto abort_error;
}
@@ -536,7 +538,8 @@ xfs_cui_item_recover(
&new_fsb, &new_len, &rcur);
if (error == -EFSCORRUPTED)
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- refc, sizeof(*refc));
+ &cuip->cui_format,
+ sizeof(cuip->cui_format));
if (error)
goto abort_error;
@@ -622,28 +625,18 @@ static const struct xfs_item_ops xfs_cui_item_ops = {
.iop_relog = xfs_cui_item_relog,
};
-/*
- * Copy an CUI format buffer from the given buf, and into the destination
- * CUI format structure. The CUI/CUD items were designed not to need any
- * special alignment handling.
- */
-static int
+static inline void
xfs_cui_copy_format(
- struct xfs_log_iovec *buf,
- struct xfs_cui_log_format *dst_cui_fmt)
+ struct xfs_cui_log_format *dst,
+ const struct xfs_cui_log_format *src)
{
- struct xfs_cui_log_format *src_cui_fmt;
- uint len;
+ unsigned int i;
- src_cui_fmt = buf->i_addr;
- len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
+ memcpy(dst, src, offsetof(struct xfs_cui_log_format, cui_extents));
- if (buf->i_len == len) {
- memcpy(dst_cui_fmt, src_cui_fmt, len);
- return 0;
- }
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
- return -EFSCORRUPTED;
+ for (i = 0; i < src->cui_nextents; i++)
+ memcpy(&dst->cui_extents[i], &src->cui_extents[i],
+ sizeof(struct xfs_phys_extent));
}
/*
@@ -660,19 +653,28 @@ xlog_recover_cui_commit_pass2(
struct xlog_recover_item *item,
xfs_lsn_t lsn)
{
- int error;
struct xfs_mount *mp = log->l_mp;
struct xfs_cui_log_item *cuip;
struct xfs_cui_log_format *cui_formatp;
+ size_t len;
cui_formatp = item->ri_buf[0].i_addr;
- cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
- error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format);
- if (error) {
- xfs_cui_item_free(cuip);
- return error;
+ if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
}
+
+ len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
+ xfs_cui_copy_format(&cuip->cui_format, cui_formatp);
atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
/*
* Insert the intent into the AIL directly and drop one reference so
@@ -706,7 +708,8 @@ xlog_recover_cud_commit_pass2(
cud_formatp = item->ri_buf[0].i_addr;
if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 251f20ddd368..93bdd25680bc 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -200,7 +200,9 @@ xfs_reflink_trim_around_shared(
if (fbno == NULLAGBLOCK) {
/* No shared blocks at all. */
return 0;
- } else if (fbno == agbno) {
+ }
+
+ if (fbno == agbno) {
/*
* The start of this extent is shared. Truncate the
* mapping at the end of the shared region so that a
@@ -210,16 +212,16 @@ xfs_reflink_trim_around_shared(
irec->br_blockcount = flen;
*shared = true;
return 0;
- } else {
- /*
- * There's a shared extent midway through this extent.
- * Truncate the mapping at the start of the shared
- * extent so that a subsequent iteration starts at the
- * start of the shared region.
- */
- irec->br_blockcount = fbno - agbno;
- return 0;
}
+
+ /*
+ * There's a shared extent midway through this extent.
+ * Truncate the mapping at the start of the shared
+ * extent so that a subsequent iteration starts at the
+ * start of the shared region.
+ */
+ irec->br_blockcount = fbno - agbno;
+ return 0;
}
int
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index fef92e02f3bb..534504ede1a3 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -155,31 +155,6 @@ xfs_rui_init(
return ruip;
}
-/*
- * Copy an RUI format buffer from the given buf, and into the destination
- * RUI format structure. The RUI/RUD items were designed not to need any
- * special alignment handling.
- */
-STATIC int
-xfs_rui_copy_format(
- struct xfs_log_iovec *buf,
- struct xfs_rui_log_format *dst_rui_fmt)
-{
- struct xfs_rui_log_format *src_rui_fmt;
- uint len;
-
- src_rui_fmt = buf->i_addr;
- len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents);
-
- if (buf->i_len != len) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
- return -EFSCORRUPTED;
- }
-
- memcpy(dst_rui_fmt, src_rui_fmt, len);
- return 0;
-}
-
static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip)
{
return container_of(lip, struct xfs_rud_log_item, rud_item);
@@ -582,7 +557,9 @@ xfs_rui_item_recover(
type = XFS_RMAP_FREE;
break;
default:
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ &ruip->rui_format,
+ sizeof(ruip->rui_format));
error = -EFSCORRUPTED;
goto abort_error;
}
@@ -652,6 +629,20 @@ static const struct xfs_item_ops xfs_rui_item_ops = {
.iop_relog = xfs_rui_item_relog,
};
+static inline void
+xfs_rui_copy_format(
+ struct xfs_rui_log_format *dst,
+ const struct xfs_rui_log_format *src)
+{
+ unsigned int i;
+
+ memcpy(dst, src, offsetof(struct xfs_rui_log_format, rui_extents));
+
+ for (i = 0; i < src->rui_nextents; i++)
+ memcpy(&dst->rui_extents[i], &src->rui_extents[i],
+ sizeof(struct xfs_map_extent));
+}
+
/*
* This routine is called to create an in-core extent rmap update
* item from the rui format structure which was logged on disk.
@@ -666,19 +657,28 @@ xlog_recover_rui_commit_pass2(
struct xlog_recover_item *item,
xfs_lsn_t lsn)
{
- int error;
struct xfs_mount *mp = log->l_mp;
struct xfs_rui_log_item *ruip;
struct xfs_rui_log_format *rui_formatp;
+ size_t len;
rui_formatp = item->ri_buf[0].i_addr;
- ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
- error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
- if (error) {
- xfs_rui_item_free(ruip);
- return error;
+ if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
}
+
+ ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
+ xfs_rui_copy_format(&ruip->rui_format, rui_formatp);
atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
/*
* Insert the intent into the AIL directly and drop one reference so
@@ -711,7 +711,11 @@ xlog_recover_rud_commit_pass2(
struct xfs_rud_log_format *rud_formatp;
rud_formatp = item->ri_buf[0].i_addr;
- ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
+ if (item->ri_buf[0].i_len != sizeof(struct xfs_rud_log_format)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ rud_formatp, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
xlog_recover_release_intent(log, XFS_LI_RUI, rud_formatp->rud_rui_id);
return 0;
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 20e0534a772c..90a77cd3ebad 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -74,7 +74,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
defer_relog += per_cpu_ptr(stats, i)->s.defer_relog;
}
- len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
+ len += scnprintf(buf + len, PATH_MAX-len, "xpc %llu %llu %llu\n",
xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n",
defer_relog);
@@ -125,7 +125,7 @@ static int xqmstat_proc_show(struct seq_file *m, void *v)
{
int j;
- seq_printf(m, "qm");
+ seq_puts(m, "qm");
for (j = XFSSTAT_START_XQMSTAT; j < XFSSTAT_END_XQMSTAT; j++)
seq_printf(m, " %u", counter_val(xfsstats.xs_stats, j));
seq_putc(m, '\n');
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 9ac59814bbb6..ee4b429a2f2c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -653,7 +653,7 @@ xfs_fs_destroy_inode(
static void
xfs_fs_dirty_inode(
struct inode *inode,
- int flag)
+ int flags)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
@@ -661,7 +661,13 @@ xfs_fs_dirty_inode(
if (!(inode->i_sb->s_flags & SB_LAZYTIME))
return;
- if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
+
+ /*
+ * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC)
+ * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed
+ * in flags possibly together with I_DIRTY_SYNC.
+ */
+ if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME))
return;
if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
@@ -2022,18 +2028,14 @@ xfs_init_caches(void)
goto out_destroy_trans_cache;
xfs_efd_cache = kmem_cache_create("xfs_efd_item",
- (sizeof(struct xfs_efd_log_item) +
- (XFS_EFD_MAX_FAST_EXTENTS - 1) *
- sizeof(struct xfs_extent)),
- 0, 0, NULL);
+ xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS),
+ 0, 0, NULL);
if (!xfs_efd_cache)
goto out_destroy_buf_item_cache;
xfs_efi_cache = kmem_cache_create("xfs_efi_item",
- (sizeof(struct xfs_efi_log_item) +
- (XFS_EFI_MAX_FAST_EXTENTS - 1) *
- sizeof(struct xfs_extent)),
- 0, 0, NULL);
+ xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS),
+ 0, 0, NULL);
if (!xfs_efi_cache)
goto out_destroy_efd_cache;
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index 43585850f154..513095e353a5 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -33,10 +33,15 @@ xfs_sysfs_init(
const char *name)
{
struct kobject *parent;
+ int err;
parent = parent_kobj ? &parent_kobj->kobject : NULL;
init_completion(&kobj->complete);
- return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name);
+ err = kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name);
+ if (err)
+ kobject_put(&kobj->kobject);
+
+ return err;
}
static inline void
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index f9057af6e0c8..372d871bccc5 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -799,6 +799,9 @@ TRACE_DEFINE_ENUM(PE_SIZE_PTE);
TRACE_DEFINE_ENUM(PE_SIZE_PMD);
TRACE_DEFINE_ENUM(PE_SIZE_PUD);
+TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED);
+TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW);
+
TRACE_EVENT(xfs_filemap_fault,
TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
bool write_fault),
@@ -1170,7 +1173,7 @@ DECLARE_EVENT_CLASS(xfs_dqtrx_class,
__entry->ino_res_used = qtrx->qt_ino_res_used;
__entry->icount_delta = qtrx->qt_icount_delta;
),
- TP_printk("dev %d:%d dquot id 0x%x type %s flags %s"
+ TP_printk("dev %d:%d dquot id 0x%x type %s flags %s "
"blk_res %llu bcount_delta %lld delbcnt_delta %lld "
"rtblk_res %llu rtblk_res_used %llu rtbcount_delta %lld delrtb_delta %lld "
"ino_res %llu ino_res_used %llu icount_delta %lld",
@@ -1602,7 +1605,7 @@ TRACE_EVENT(xfs_bunmap,
__entry->caller_ip = caller_ip;
__entry->flags = flags;
),
- TP_printk("dev %d:%d ino 0x%llx disize 0x%llx fileoff 0x%llx fsbcount 0x%llx"
+ TP_printk("dev %d:%d ino 0x%llx disize 0x%llx fileoff 0x%llx fsbcount 0x%llx "
"flags %s caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
@@ -2925,6 +2928,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, domain)
__field(xfs_agblock_t, startblock)
__field(xfs_extlen_t, blockcount)
__field(xfs_nlink_t, refcount)
@@ -2932,13 +2936,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->domain = irec->rc_domain;
__entry->startblock = irec->rc_startblock;
__entry->blockcount = irec->rc_blockcount;
__entry->refcount = irec->rc_refcount;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS),
__entry->startblock,
__entry->blockcount,
__entry->refcount)
@@ -2958,6 +2964,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, domain)
__field(xfs_agblock_t, startblock)
__field(xfs_extlen_t, blockcount)
__field(xfs_nlink_t, refcount)
@@ -2966,14 +2973,16 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->domain = irec->rc_domain;
__entry->startblock = irec->rc_startblock;
__entry->blockcount = irec->rc_blockcount;
__entry->refcount = irec->rc_refcount;
__entry->agbno = agbno;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS),
__entry->startblock,
__entry->blockcount,
__entry->refcount,
@@ -2994,9 +3003,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, i1_domain)
__field(xfs_agblock_t, i1_startblock)
__field(xfs_extlen_t, i1_blockcount)
__field(xfs_nlink_t, i1_refcount)
+ __field(enum xfs_refc_domain, i2_domain)
__field(xfs_agblock_t, i2_startblock)
__field(xfs_extlen_t, i2_blockcount)
__field(xfs_nlink_t, i2_refcount)
@@ -3004,20 +3015,24 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
__entry->i1_refcount = i1->rc_refcount;
+ __entry->i2_domain = i2->rc_domain;
__entry->i2_startblock = i2->rc_startblock;
__entry->i2_blockcount = i2->rc_blockcount;
__entry->i2_refcount = i2->rc_refcount;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- "
- "agbno 0x%x fsbcount 0x%x refcount %u",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
+ "dom %s agbno 0x%x fsbcount 0x%x refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i1_startblock,
__entry->i1_blockcount,
__entry->i1_refcount,
+ __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i2_startblock,
__entry->i2_blockcount,
__entry->i2_refcount)
@@ -3038,9 +3053,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, i1_domain)
__field(xfs_agblock_t, i1_startblock)
__field(xfs_extlen_t, i1_blockcount)
__field(xfs_nlink_t, i1_refcount)
+ __field(enum xfs_refc_domain, i2_domain)
__field(xfs_agblock_t, i2_startblock)
__field(xfs_extlen_t, i2_blockcount)
__field(xfs_nlink_t, i2_refcount)
@@ -3049,21 +3066,25 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
__entry->i1_refcount = i1->rc_refcount;
+ __entry->i2_domain = i2->rc_domain;
__entry->i2_startblock = i2->rc_startblock;
__entry->i2_blockcount = i2->rc_blockcount;
__entry->i2_refcount = i2->rc_refcount;
__entry->agbno = agbno;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- "
- "agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
+ "dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i1_startblock,
__entry->i1_blockcount,
__entry->i1_refcount,
+ __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i2_startblock,
__entry->i2_blockcount,
__entry->i2_refcount,
@@ -3086,12 +3107,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, i1_domain)
__field(xfs_agblock_t, i1_startblock)
__field(xfs_extlen_t, i1_blockcount)
__field(xfs_nlink_t, i1_refcount)
+ __field(enum xfs_refc_domain, i2_domain)
__field(xfs_agblock_t, i2_startblock)
__field(xfs_extlen_t, i2_blockcount)
__field(xfs_nlink_t, i2_refcount)
+ __field(enum xfs_refc_domain, i3_domain)
__field(xfs_agblock_t, i3_startblock)
__field(xfs_extlen_t, i3_blockcount)
__field(xfs_nlink_t, i3_refcount)
@@ -3099,27 +3123,33 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
__entry->i1_refcount = i1->rc_refcount;
+ __entry->i2_domain = i2->rc_domain;
__entry->i2_startblock = i2->rc_startblock;
__entry->i2_blockcount = i2->rc_blockcount;
__entry->i2_refcount = i2->rc_refcount;
+ __entry->i3_domain = i3->rc_domain;
__entry->i3_startblock = i3->rc_startblock;
__entry->i3_blockcount = i3->rc_blockcount;
__entry->i3_refcount = i3->rc_refcount;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- "
- "agbno 0x%x fsbcount 0x%x refcount %u -- "
- "agbno 0x%x fsbcount 0x%x refcount %u",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
+ "dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
+ "dom %s agbno 0x%x fsbcount 0x%x refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i1_startblock,
__entry->i1_blockcount,
__entry->i1_refcount,
+ __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i2_startblock,
__entry->i2_blockcount,
__entry->i2_refcount,
+ __print_symbolic(__entry->i3_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i3_startblock,
__entry->i3_blockcount,
__entry->i3_refcount)
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index d3a97a028560..f51df7d94ef7 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -602,9 +602,9 @@ xfsaild(
while (1) {
if (tout && tout <= 20)
- set_current_state(TASK_KILLABLE);
+ set_current_state(TASK_KILLABLE|TASK_FREEZABLE);
else
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
/*
* Check kthread_should_stop() after we set the task state to
@@ -653,14 +653,14 @@ xfsaild(
ailp->ail_target == ailp->ail_target_prev &&
list_empty(&ailp->ail_buf_list)) {
spin_unlock(&ailp->ail_lock);
- freezable_schedule();
+ schedule();
tout = 0;
continue;
}
spin_unlock(&ailp->ail_lock);
if (tout)
- freezable_schedule_timeout(msecs_to_jiffies(tout));
+ schedule_timeout(msecs_to_jiffies(tout));
__set_current_state(TASK_RUNNING);
@@ -730,11 +730,10 @@ void
xfs_ail_push_all_sync(
struct xfs_ail *ailp)
{
- struct xfs_log_item *lip;
DEFINE_WAIT(wait);
spin_lock(&ailp->ail_lock);
- while ((lip = xfs_ail_max(ailp)) != NULL) {
+ while (xfs_ail_max(ailp) != NULL) {
prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE);
wake_up_process(ailp->ail_task);
spin_unlock(&ailp->ail_lock);