diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/dir.c | 108 | ||||
-rw-r--r-- | fs/afs/dir_silly.c | 22 | ||||
-rw-r--r-- | fs/afs/fsclient.c | 27 | ||||
-rw-r--r-- | fs/afs/yfsclient.c | 26 | ||||
-rw-r--r-- | fs/btrfs/block-group.c | 1 | ||||
-rw-r--r-- | fs/btrfs/file.c | 15 | ||||
-rw-r--r-- | fs/btrfs/reflink.c | 1 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 23 | ||||
-rw-r--r-- | fs/btrfs/space-info.c | 20 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 93 | ||||
-rw-r--r-- | fs/ceph/dir.c | 4 | ||||
-rw-r--r-- | fs/ceph/file.c | 4 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 2 | ||||
-rw-r--r-- | fs/cifs/Kconfig | 2 | ||||
-rw-r--r-- | fs/cifs/cifs_debug.c | 6 | ||||
-rw-r--r-- | fs/cifs/cifsfs.c | 4 | ||||
-rw-r--r-- | fs/cifs/cifsglob.h | 4 | ||||
-rw-r--r-- | fs/cifs/file.c | 61 | ||||
-rw-r--r-- | fs/cifs/inode.c | 4 | ||||
-rw-r--r-- | fs/cifs/readdir.c | 2 | ||||
-rw-r--r-- | fs/cifs/smb2misc.c | 14 | ||||
-rw-r--r-- | fs/cifs/smb2proto.h | 6 | ||||
-rw-r--r-- | fs/cifs/smb2transport.c | 87 | ||||
-rw-r--r-- | fs/cifs/smbdirect.c | 313 | ||||
-rw-r--r-- | fs/cifs/smbdirect.h | 7 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 3 | ||||
-rw-r--r-- | fs/nfs/pnfs_nfs.c | 1 |
27 files changed, 456 insertions, 404 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5c794f4b051a..d1e1caa23c8b 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1032,7 +1032,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) struct dentry *parent; struct inode *inode; struct key *key; - afs_dataversion_t dir_version; + afs_dataversion_t dir_version, invalid_before; long de_version; int ret; @@ -1084,8 +1084,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (de_version == (long)dir_version) goto out_valid_noupdate; - dir_version = dir->invalid_before; - if (de_version - (long)dir_version >= 0) + invalid_before = dir->invalid_before; + if (de_version - (long)invalid_before >= 0) goto out_valid; _debug("dir modified"); @@ -1275,6 +1275,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; + afs_dataversion_t data_version; int ret; mode |= S_IFDIR; @@ -1295,7 +1296,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1316,10 +1317,14 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto error_key; } - if (ret == 0 && - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, - afs_edit_dir_for_create); + if (ret == 0) { + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) + afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, + afs_edit_dir_for_create); + up_write(&dvnode->validate_lock); + } key_put(key); kfree(scb); @@ -1360,6 +1365,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; struct key *key; + afs_dataversion_t data_version; int ret; _enter("{%llx:%llu},{%pd}", @@ -1391,7 +1397,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1404,9 +1410,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) ret = afs_end_vnode_operation(&fc); if (ret == 0) { afs_dir_remove_subdir(dentry); - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) afs_edit_dir_remove(dvnode, &dentry->d_name, afs_edit_dir_for_rmdir); + up_write(&dvnode->validate_lock); } } @@ -1544,10 +1553,15 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) ret = afs_end_vnode_operation(&fc); if (ret == 0 && !(scb[1].have_status || scb[1].have_error)) ret = afs_dir_remove_link(dvnode, dentry, key); - if (ret == 0 && - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_remove(dvnode, &dentry->d_name, - afs_edit_dir_for_unlink); + + if (ret == 0) { + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) + afs_edit_dir_remove(dvnode, &dentry->d_name, + afs_edit_dir_for_unlink); + up_write(&dvnode->validate_lock); + } } if (need_rehash && ret < 0 && ret != -ENOENT) @@ -1573,6 +1587,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; + afs_dataversion_t data_version; int ret; mode |= S_IFREG; @@ -1597,7 +1612,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1618,9 +1633,12 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, goto error_key; } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, afs_edit_dir_for_create); + up_write(&dvnode->validate_lock); kfree(scb); key_put(key); @@ -1648,6 +1666,7 @@ static int afs_link(struct dentry *from, struct inode *dir, struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_vnode *vnode = AFS_FS_I(d_inode(from)); struct key *key; + afs_dataversion_t data_version; int ret; _enter("{%llx:%llu},{%llx:%llu},{%pd}", @@ -1672,7 +1691,7 @@ static int afs_link(struct dentry *from, struct inode *dir, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) { afs_end_vnode_operation(&fc); @@ -1702,9 +1721,12 @@ static int afs_link(struct dentry *from, struct inode *dir, goto error_key; } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid, afs_edit_dir_for_link); + up_write(&dvnode->validate_lock); key_put(key); kfree(scb); @@ -1732,6 +1754,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; + afs_dataversion_t data_version; int ret; _enter("{%llx:%llu},{%pd},%s", @@ -1759,7 +1782,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1780,9 +1803,12 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, goto error_key; } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, afs_edit_dir_for_symlink); + up_write(&dvnode->validate_lock); key_put(key); kfree(scb); @@ -1812,6 +1838,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *tmp = NULL, *rehash = NULL; struct inode *new_inode; struct key *key; + afs_dataversion_t orig_data_version; + afs_dataversion_t new_data_version; bool new_negative = d_is_negative(new_dentry); int ret; @@ -1890,10 +1918,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) { - afs_dataversion_t orig_data_version; - afs_dataversion_t new_data_version; - struct afs_status_cb *new_scb = &scb[1]; - orig_data_version = orig_dvnode->status.data_version + 1; if (orig_dvnode != new_dvnode) { @@ -1904,7 +1928,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, new_data_version = new_dvnode->status.data_version + 1; } else { new_data_version = orig_data_version; - new_scb = &scb[0]; } while (afs_select_fileserver(&fc)) { @@ -1912,7 +1935,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode); afs_fs_rename(&fc, old_dentry->d_name.name, new_dvnode, new_dentry->d_name.name, - &scb[0], new_scb); + &scb[0], &scb[1]); } afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break, @@ -1930,18 +1953,25 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, if (ret == 0) { if (rehash) d_rehash(rehash); - if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags)) - afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, - afs_edit_dir_for_rename_0); + down_write(&orig_dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) && + orig_dvnode->status.data_version == orig_data_version) + afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, + afs_edit_dir_for_rename_0); + if (orig_dvnode != new_dvnode) { + up_write(&orig_dvnode->validate_lock); - if (!new_negative && - test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) - afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, - afs_edit_dir_for_rename_1); + down_write(&new_dvnode->validate_lock); + } + if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) && + orig_dvnode->status.data_version == new_data_version) { + if (!new_negative) + afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, + afs_edit_dir_for_rename_1); - if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) afs_edit_dir_add(new_dvnode, &new_dentry->d_name, &vnode->fid, afs_edit_dir_for_rename_2); + } new_inode = d_inode(new_dentry); if (new_inode) { @@ -1957,14 +1987,10 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, * Note that if we ever implement RENAME_EXCHANGE, we'll have * to update both dentries with opposing dir versions. */ - if (new_dvnode != orig_dvnode) { - afs_update_dentry_version(&fc, old_dentry, &scb[1]); - afs_update_dentry_version(&fc, new_dentry, &scb[1]); - } else { - afs_update_dentry_version(&fc, old_dentry, &scb[0]); - afs_update_dentry_version(&fc, new_dentry, &scb[0]); - } + afs_update_dentry_version(&fc, old_dentry, &scb[1]); + afs_update_dentry_version(&fc, new_dentry, &scb[1]); d_move(old_dentry, new_dentry); + up_write(&new_dvnode->validate_lock); goto error_tmp; } diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index 361088a5edb9..d94e2b7cddff 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -21,6 +21,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode { struct afs_fs_cursor fc; struct afs_status_cb *scb; + afs_dataversion_t dir_data_version; int ret = -ERESTARTSYS; _enter("%pd,%pd", old, new); @@ -31,7 +32,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode trace_afs_silly_rename(vnode, false); if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t dir_data_version = dvnode->status.data_version + 1; + dir_data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -54,12 +55,15 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode dvnode->silly_key = key_get(key); } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dir_data_version) { afs_edit_dir_remove(dvnode, &old->d_name, afs_edit_dir_for_silly_0); - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) afs_edit_dir_add(dvnode, &new->d_name, &vnode->fid, afs_edit_dir_for_silly_1); + } + up_write(&dvnode->validate_lock); } kfree(scb); @@ -181,10 +185,14 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } } - if (ret == 0 && - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_remove(dvnode, &dentry->d_name, - afs_edit_dir_for_unlink); + if (ret == 0) { + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dir_data_version) + afs_edit_dir_remove(dvnode, &dentry->d_name, + afs_edit_dir_for_unlink); + up_write(&dvnode->validate_lock); + } } kfree(scb); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 1f9c5d8e6fe5..68fc46634346 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -65,6 +65,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus); u64 data_version, size; u32 type, abort_code; + int ret; abort_code = ntohl(xdr->abort_code); @@ -78,7 +79,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, */ status->abort_code = abort_code; scb->have_error = true; - return 0; + goto good; } pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); @@ -87,7 +88,8 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, if (abort_code != 0 && inline_error) { status->abort_code = abort_code; - return 0; + scb->have_error = true; + goto good; } type = ntohl(xdr->type); @@ -123,13 +125,16 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, data_version |= (u64)ntohl(xdr->data_version_hi) << 32; status->data_version = data_version; scb->have_status = true; - +good: + ret = 0; +advance: *_bp = (const void *)*_bp + sizeof(*xdr); - return 0; + return ret; bad: xdr_dump_bad(*_bp); - return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + goto advance; } static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry) @@ -981,16 +986,16 @@ static int afs_deliver_fs_rename(struct afs_call *call) if (ret < 0) return ret; - /* unmarshall the reply once we've received all of it */ + /* If the two dirs are the same, we have two copies of the same status + * report, so we just decode it twice. + */ bp = call->buffer; ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - if (call->out_dir_scb != call->out_scb) { - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - } + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); + if (ret < 0) + return ret; xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index a26126ac7bf1..b5b45c57e1b1 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -165,15 +165,15 @@ static void xdr_dump_bad(const __be32 *bp) int i; pr_notice("YFS XDR: Bad status record\n"); - for (i = 0; i < 5 * 4 * 4; i += 16) { + for (i = 0; i < 6 * 4 * 4; i += 16) { memcpy(x, bp, 16); bp += 4; pr_notice("%03x: %08x %08x %08x %08x\n", i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3])); } - memcpy(x, bp, 4); - pr_notice("0x50: %08x\n", ntohl(x[0])); + memcpy(x, bp, 8); + pr_notice("0x60: %08x %08x\n", ntohl(x[0]), ntohl(x[1])); } /* @@ -186,13 +186,14 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp, const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp; struct afs_file_status *status = &scb->status; u32 type; + int ret; status->abort_code = ntohl(xdr->abort_code); if (status->abort_code != 0) { if (status->abort_code == VNOVNODE) status->nlink = 0; scb->have_error = true; - return 0; + goto good; } type = ntohl(xdr->type); @@ -220,13 +221,16 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp, status->size = xdr_to_u64(xdr->size); status->data_version = xdr_to_u64(xdr->data_version); scb->have_status = true; - +good: + ret = 0; +advance: *_bp += xdr_size(xdr); - return 0; + return ret; bad: xdr_dump_bad(*_bp); - return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + goto advance; } /* @@ -1153,11 +1157,9 @@ static int yfs_deliver_fs_rename(struct afs_call *call) ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - if (call->out_dir_scb != call->out_scb) { - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - } + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); + if (ret < 0) + return ret; xdr_decode_YFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 786849fcc319..47f66c6a7d7f 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3370,6 +3370,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) space_info->bytes_reserved > 0 || space_info->bytes_may_use > 0)) btrfs_dump_space_info(info, space_info, 0, 0); + WARN_ON(space_info->reclaim_size > 0); list_del(&space_info->list); btrfs_sysfs_remove_space_info(space_info); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8a144f9cb7ac..719e68ab552c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2098,6 +2098,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); /* + * If the inode needs a full sync, make sure we use a full range to + * avoid log tree corruption, due to hole detection racing with ordered + * extent completion for adjacent ranges and races between logging and + * completion of ordered extents for adjancent ranges - both races + * could lead to file extent items in the log with overlapping ranges. + * Do this while holding the inode lock, to avoid races with other + * tasks. + */ + if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags)) { + start = 0; + end = LLONG_MAX; + } + + /* * Before we acquired the inode's lock, someone may have dirtied more * pages in the target range. We need to make sure that writeback for * any such pages does not start while we are logging the inode, because diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index d1973141d3bb..040009d1cc31 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -264,6 +264,7 @@ copy_inline_extent: size); inode_add_bytes(dst, datal); set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags); + ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end); out: if (!ret && !trans) { /* diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f65595602aa8..d35936c934ab 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -611,8 +611,8 @@ static int should_ignore_root(struct btrfs_root *root) if (!reloc_root) return 0; - if (btrfs_root_last_snapshot(&reloc_root->root_item) == - root->fs_info->running_transaction->transid - 1) + if (btrfs_header_generation(reloc_root->commit_root) == + root->fs_info->running_transaction->transid) return 0; /* * if there is reloc tree and it was created in previous @@ -1527,8 +1527,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, int clear_rsv = 0; int ret; - if (!rc || !rc->create_reloc_tree || - root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + if (!rc) return 0; /* @@ -1538,12 +1537,28 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, if (reloc_root_is_dead(root)) return 0; + /* + * This is subtle but important. We do not do + * record_root_in_transaction for reloc roots, instead we record their + * corresponding fs root, and then here we update the last trans for the + * reloc root. This means that we have to do this for the entire life + * of the reloc root, regardless of which stage of the relocation we are + * in. + */ if (root->reloc_root) { reloc_root = root->reloc_root; reloc_root->last_trans = trans->transid; return 0; } + /* + * We are merging reloc roots, we do not need new reloc trees. Also + * reloc trees never need their own reloc tree. + */ + if (!rc->create_reloc_tree || + root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + return 0; + if (!trans->reloc_reserved) { rsv = trans->block_rsv; trans->block_rsv = rc->block_rsv; diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 8b0fe053a25d..ff17a4420358 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -361,6 +361,16 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, return 0; } +static void remove_ticket(struct btrfs_space_info *space_info, + struct reserve_ticket *ticket) +{ + if (!list_empty(&ticket->list)) { + list_del_init(&ticket->list); + ASSERT(space_info->reclaim_size >= ticket->bytes); + space_info->reclaim_size -= ticket->bytes; + } +} + /* * This is for space we already have accounted in space_info->bytes_may_use, so * basically when we're returning space from block_rsv's. @@ -388,9 +398,7 @@ again: btrfs_space_info_update_bytes_may_use(fs_info, space_info, ticket->bytes); - list_del_init(&ticket->list); - ASSERT(space_info->reclaim_size >= ticket->bytes); - space_info->reclaim_size -= ticket->bytes; + remove_ticket(space_info, ticket); ticket->bytes = 0; space_info->tickets_id++; wake_up(&ticket->wait); @@ -899,7 +907,7 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info, btrfs_info(fs_info, "failing ticket with %llu bytes", ticket->bytes); - list_del_init(&ticket->list); + remove_ticket(space_info, ticket); ticket->error = -ENOSPC; wake_up(&ticket->wait); @@ -1063,7 +1071,7 @@ static void wait_reserve_ticket(struct btrfs_fs_info *fs_info, * despite getting an error, resulting in a space leak * (bytes_may_use counter of our space_info). */ - list_del_init(&ticket->list); + remove_ticket(space_info, ticket); ticket->error = -EINTR; break; } @@ -1121,7 +1129,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info, * either the async reclaim job deletes the ticket from the list * or we delete it ourselves at wait_reserve_ticket(). */ - list_del_init(&ticket->list); + remove_ticket(space_info, ticket); if (!ret) ret = -ENOSPC; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 58c111474ba5..ec36a7c6ba3d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -96,8 +96,8 @@ enum { static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, int inode_only, - u64 start, - u64 end, + const loff_t start, + const loff_t end, struct btrfs_log_ctx *ctx); static int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -4533,15 +4533,13 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, static int btrfs_log_holes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, - struct btrfs_path *path, - const u64 start, - const u64 end) + struct btrfs_path *path) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key key; const u64 ino = btrfs_ino(inode); const u64 i_size = i_size_read(&inode->vfs_inode); - u64 prev_extent_end = start; + u64 prev_extent_end = 0; int ret; if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0) @@ -4549,21 +4547,14 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans, key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = start; + key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) return ret; - if (ret > 0 && path->slots[0] > 0) { - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); - if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY) - path->slots[0]--; - } - while (true) { struct extent_buffer *leaf = path->nodes[0]; - u64 extent_end; if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(root, path); @@ -4580,18 +4571,9 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans, if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) break; - extent_end = btrfs_file_extent_end(path); - if (extent_end <= start) - goto next_slot; - /* We have a hole, log it. */ if (prev_extent_end < key.offset) { - u64 hole_len; - - if (key.offset >= end) - hole_len = end - prev_extent_end; - else - hole_len = key.offset - prev_extent_end; + const u64 hole_len = key.offset - prev_extent_end; /* * Release the path to avoid deadlocks with other code @@ -4621,20 +4603,16 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; } - prev_extent_end = min(extent_end, end); - if (extent_end >= end) - break; -next_slot: + prev_extent_end = btrfs_file_extent_end(path); path->slots[0]++; cond_resched(); } - if (prev_extent_end < end && prev_extent_end < i_size) { + if (prev_extent_end < i_size) { u64 hole_len; btrfs_release_path(path); - hole_len = min(ALIGN(i_size, fs_info->sectorsize), end); - hole_len -= prev_extent_end; + hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize); ret = btrfs_insert_file_extent(trans, root->log_root, ino, prev_extent_end, 0, 0, hole_len, 0, hole_len, @@ -4971,8 +4949,6 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, const u64 logged_isize, const bool recursive_logging, const int inode_only, - const u64 start, - const u64 end, struct btrfs_log_ctx *ctx, bool *need_log_inode_item) { @@ -4981,21 +4957,6 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, int ins_nr = 0; int ret; - /* - * We must make sure we don't copy extent items that are entirely out of - * the range [start, end - 1]. This is not just an optimization to avoid - * copying but also needed to avoid a corruption where we end up with - * file extent items in the log tree that have overlapping ranges - this - * can happen if we race with ordered extent completion for ranges that - * are outside our target range. For example we copy an extent item and - * when we move to the next leaf, that extent was trimmed and a new one - * covering a subrange of it, but with a higher key, was inserted - we - * would then copy this other extent too, resulting in a log tree with - * 2 extent items that represent overlapping ranges. - * - * We can copy the entire extents at the range bondaries however, even - * if they cover an area outside the target range. That's ok. - */ while (1) { ret = btrfs_search_forward(root, min_key, path, trans->transid); if (ret < 0) @@ -5063,29 +5024,6 @@ again: goto next_slot; } - if (min_key->type == BTRFS_EXTENT_DATA_KEY) { - const u64 extent_end = btrfs_file_extent_end(path); - - if (extent_end <= start) { - if (ins_nr > 0) { - ret = copy_items(trans, inode, dst_path, - path, ins_start_slot, - ins_nr, inode_only, - logged_isize); - if (ret < 0) - return ret; - ins_nr = 0; - } - goto next_slot; - } - if (extent_end >= end) { - ins_nr++; - if (ins_nr == 1) - ins_start_slot = path->slots[0]; - break; - } - } - if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; goto next_slot; @@ -5151,8 +5089,8 @@ next_key: static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, int inode_only, - u64 start, - u64 end, + const loff_t start, + const loff_t end, struct btrfs_log_ctx *ctx) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -5180,9 +5118,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, return -ENOMEM; } - start = ALIGN_DOWN(start, fs_info->sectorsize); - end = ALIGN(end, fs_info->sectorsize); - min_key.objectid = ino; min_key.type = BTRFS_INODE_ITEM_KEY; min_key.offset = 0; @@ -5298,8 +5233,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, err = copy_inode_items_to_log(trans, inode, &min_key, &max_key, path, dst_path, logged_isize, - recursive_logging, inode_only, - start, end, ctx, &need_log_inode_item); + recursive_logging, inode_only, ctx, + &need_log_inode_item); if (err) goto out_unlock; @@ -5312,7 +5247,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); - err = btrfs_log_holes(trans, root, inode, path, start, end); + err = btrfs_log_holes(trans, root, inode, path); if (err) goto out_unlock; } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d594c2627430..4c4202c93b71 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1051,8 +1051,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, /* If op failed, mark everyone involved for errors */ if (result) { - int pathlen; - u64 base; + int pathlen = 0; + u64 base = 0; char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen, &base, 0); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4a5ccbb7e808..afdfca965a7f 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -527,8 +527,8 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, if (result) { struct dentry *dentry = req->r_dentry; - int pathlen; - u64 base; + int pathlen = 0; + u64 base = 0; char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen, &base, 0); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 4e5be79bf080..903d9edfd4bf 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -521,7 +521,7 @@ extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); static inline void ceph_mdsc_free_path(char *path, int len) { - if (path) + if (!IS_ERR_OR_NULL(path)) __putname(path - (PATH_MAX - 1 - len)); } diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 22cf04fb32d3..604f65f4b6c5 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -202,7 +202,7 @@ config CIFS_SMB_DIRECT help Enables SMB Direct support for SMB 3.0, 3.02 and 3.1.1. SMB Direct allows transferring SMB packets over RDMA. If unsure, - say N. + say Y. config CIFS_FSCACHE bool "Provide CIFS client caching support" diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 276e4b5ea8e0..916567d770f5 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -323,10 +323,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) atomic_read(&server->smbd_conn->send_credits), atomic_read(&server->smbd_conn->receive_credits), server->smbd_conn->receive_credit_target); - seq_printf(m, "\nPending send_pending: %x " - "send_payload_pending: %x", - atomic_read(&server->smbd_conn->send_pending), - atomic_read(&server->smbd_conn->send_payload_pending)); + seq_printf(m, "\nPending send_pending: %x ", + atomic_read(&server->smbd_conn->send_pending)); seq_printf(m, "\nReceive buffers count_receive_queue: %x " "count_empty_packet_queue: %x", server->smbd_conn->count_receive_queue, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 94e3ed4850b5..c31f362fa098 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1208,6 +1208,10 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, { unsigned int xid = get_xid(); ssize_t rc; + struct cifsFileInfo *cfile = dst_file->private_data; + + if (cfile->swapfile) + return -EOPNOTSUPP; rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, len, flags); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0d956360e984..05dd3dea684b 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -426,7 +426,8 @@ struct smb_version_operations { /* generate new lease key */ void (*new_lease_key)(struct cifs_fid *); int (*generate_signingkey)(struct cifs_ses *); - int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *); + int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *, + bool allocate_crypto); int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon, struct cifsFileInfo *src_file); int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon, @@ -1312,6 +1313,7 @@ struct cifsFileInfo { struct tcon_link *tlink; unsigned int f_flags; bool invalidHandle:1; /* file closed via session abend */ + bool swapfile:1; bool oplock_break_cancelled:1; unsigned int oplock_epoch; /* epoch from the lease break */ __u32 oplock_level; /* oplock/lease level from the lease break */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5920820bfbd0..0b1528edebcf 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4808,6 +4808,60 @@ cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) return -EINVAL; } +static int cifs_swap_activate(struct swap_info_struct *sis, + struct file *swap_file, sector_t *span) +{ + struct cifsFileInfo *cfile = swap_file->private_data; + struct inode *inode = swap_file->f_mapping->host; + unsigned long blocks; + long long isize; + + cifs_dbg(FYI, "swap activate\n"); + + spin_lock(&inode->i_lock); + blocks = inode->i_blocks; + isize = inode->i_size; + spin_unlock(&inode->i_lock); + if (blocks*512 < isize) { + pr_warn("swap activate: swapfile has holes\n"); + return -EINVAL; + } + *span = sis->pages; + + printk_once(KERN_WARNING "Swap support over SMB3 is experimental\n"); + + /* + * TODO: consider adding ACL (or documenting how) to prevent other + * users (on this or other systems) from reading it + */ + + + /* TODO: add sk_set_memalloc(inet) or similar */ + + if (cfile) + cfile->swapfile = true; + /* + * TODO: Since file already open, we can't open with DENY_ALL here + * but we could add call to grab a byte range lock to prevent others + * from reading or writing the file + */ + + return 0; +} + +static void cifs_swap_deactivate(struct file *file) +{ + struct cifsFileInfo *cfile = file->private_data; + + cifs_dbg(FYI, "swap deactivate\n"); + + /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ + + if (cfile) + cfile->swapfile = false; + + /* do we need to unpin (or unlock) the file */ +} const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, @@ -4821,6 +4875,13 @@ const struct address_space_operations cifs_addr_ops = { .direct_IO = cifs_direct_io, .invalidatepage = cifs_invalidate_page, .launder_page = cifs_launder_page, + /* + * TODO: investigate and if useful we could add an cifs_migratePage + * helper (under an CONFIG_MIGRATION) in the future, and also + * investigate and add an is_dirty_writeback helper if needed + */ + .swap_activate = cifs_swap_activate, + .swap_deactivate = cifs_swap_deactivate, }; /* diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 8d01ec2dca66..8fbbdcdad8ff 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2026,6 +2026,10 @@ cifs_revalidate_mapping(struct inode *inode) int rc; unsigned long *flags = &CIFS_I(inode)->flags; + /* swapfiles are not supposed to be shared */ + if (IS_SWAPFILE(inode)) + return 0; + rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable, TASK_KILLABLE); if (rc) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 19e4a5d3b4ca..50f776a8d4ba 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -246,7 +246,7 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info, */ fattr->cf_mode = le32_to_cpu(info->Mode) & ~S_IFMT; - cifs_dbg(VFS, "XXX dev %d, reparse %d, mode %o", + cifs_dbg(FYI, "posix fattr: dev %d, reparse %d, mode %o", le32_to_cpu(info->DeviceId), le32_to_cpu(info->ReparseTag), le32_to_cpu(info->Mode)); diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 0511aaf451d4..497afb0b9960 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -766,6 +766,20 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count); spin_lock(&cifs_tcp_ses_lock); + if (tcon->tc_count <= 0) { + struct TCP_Server_Info *server = NULL; + + WARN_ONCE(tcon->tc_count < 0, "tcon refcount is negative"); + spin_unlock(&cifs_tcp_ses_lock); + + if (tcon->ses) + server = tcon->ses->server; + + cifs_server_dbg(FYI, "tid=%u: tcon is closing, skipping async close retry of fid %llu %llu\n", + tcon->tid, persistent_fid, volatile_fid); + + return 0; + } tcon->tc_count++; spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 4d1ff7b66fdc..087d5f14320b 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -55,9 +55,11 @@ extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server, extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid); extern int smb2_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server); + struct TCP_Server_Info *server, + bool allocate_crypto); extern int smb3_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server); + struct TCP_Server_Info *server, + bool allocate_crypto); extern void smb2_echo_request(struct work_struct *work); extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); extern bool smb2_is_valid_oplock_break(char *buffer, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 20cc79e5c15d..1a6c227ada8f 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -41,14 +41,6 @@ #include "smb2glob.h" static int -smb2_crypto_shash_allocate(struct TCP_Server_Info *server) -{ - return cifs_alloc_hash("hmac(sha256)", - &server->secmech.hmacsha256, - &server->secmech.sdeschmacsha256); -} - -static int smb3_crypto_shash_allocate(struct TCP_Server_Info *server) { struct cifs_secmech *p = &server->secmech; @@ -219,7 +211,8 @@ smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid) } int -smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) +smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, + bool allocate_crypto) { int rc; unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; @@ -228,6 +221,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; struct cifs_ses *ses; struct shash_desc *shash; + struct crypto_shash *hash; + struct sdesc *sdesc = NULL; struct smb_rqst drqst; ses = smb2_find_smb_ses(server, shdr->SessionId); @@ -239,24 +234,32 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE); - rc = smb2_crypto_shash_allocate(server); - if (rc) { - cifs_server_dbg(VFS, "%s: sha256 alloc failed\n", __func__); - return rc; + if (allocate_crypto) { + rc = cifs_alloc_hash("hmac(sha256)", &hash, &sdesc); + if (rc) { + cifs_server_dbg(VFS, + "%s: sha256 alloc failed\n", __func__); + return rc; + } + shash = &sdesc->shash; + } else { + hash = server->secmech.hmacsha256; + shash = &server->secmech.sdeschmacsha256->shash; } - rc = crypto_shash_setkey(server->secmech.hmacsha256, - ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); + rc = crypto_shash_setkey(hash, ses->auth_key.response, + SMB2_NTLMV2_SESSKEY_SIZE); if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with response\n", __func__); - return rc; + cifs_server_dbg(VFS, + "%s: Could not update with response\n", + __func__); + goto out; } - shash = &server->secmech.sdeschmacsha256->shash; rc = crypto_shash_init(shash); if (rc) { cifs_server_dbg(VFS, "%s: Could not init sha256", __func__); - return rc; + goto out; } /* @@ -271,9 +274,10 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) rc = crypto_shash_update(shash, iov[0].iov_base, iov[0].iov_len); if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with payload\n", - __func__); - return rc; + cifs_server_dbg(VFS, + "%s: Could not update with payload\n", + __func__); + goto out; } drqst.rq_iov++; drqst.rq_nvec--; @@ -283,6 +287,9 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) if (!rc) memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); +out: + if (allocate_crypto) + cifs_free_hash(&hash, &sdesc); return rc; } @@ -504,14 +511,17 @@ generate_smb311signingkey(struct cifs_ses *ses) } int -smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) +smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, + bool allocate_crypto) { int rc; unsigned char smb3_signature[SMB2_CMACAES_SIZE]; unsigned char *sigptr = smb3_signature; struct kvec *iov = rqst->rq_iov; struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; - struct shash_desc *shash = &server->secmech.sdesccmacaes->shash; + struct shash_desc *shash; + struct crypto_shash *hash; + struct sdesc *sdesc = NULL; struct smb_rqst drqst; u8 key[SMB3_SIGN_KEY_SIZE]; @@ -519,14 +529,24 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) if (rc) return 0; + if (allocate_crypto) { + rc = cifs_alloc_hash("cmac(aes)", &hash, &sdesc); + if (rc) + return rc; + + shash = &sdesc->shash; + } else { + hash = server->secmech.cmacaes; + shash = &server->secmech.sdesccmacaes->shash; + } + memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE); memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE); - rc = crypto_shash_setkey(server->secmech.cmacaes, - key, SMB2_CMACAES_SIZE); + rc = crypto_shash_setkey(hash, key, SMB2_CMACAES_SIZE); if (rc) { cifs_server_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__); - return rc; + goto out; } /* @@ -537,7 +557,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) rc = crypto_shash_init(shash); if (rc) { cifs_server_dbg(VFS, "%s: Could not init cmac aes\n", __func__); - return rc; + goto out; } /* @@ -554,7 +574,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) if (rc) { cifs_server_dbg(VFS, "%s: Could not update with payload\n", __func__); - return rc; + goto out; } drqst.rq_iov++; drqst.rq_nvec--; @@ -564,6 +584,9 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) if (!rc) memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); +out: + if (allocate_crypto) + cifs_free_hash(&hash, &sdesc); return rc; } @@ -593,7 +616,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) return 0; } - rc = server->ops->calc_signature(rqst, server); + rc = server->ops->calc_signature(rqst, server, false); return rc; } @@ -631,9 +654,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) memset(shdr->Signature, 0, SMB2_SIGNATURE_SIZE); - mutex_lock(&server->srv_mutex); - rc = server->ops->calc_signature(rqst, server); - mutex_unlock(&server->srv_mutex); + rc = server->ops->calc_signature(rqst, server, true); if (rc) return rc; diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 8da43a500686..1a5834a5d597 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -284,13 +284,10 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) request->sge[i].length, DMA_TO_DEVICE); - if (request->has_payload) { - if (atomic_dec_and_test(&request->info->send_payload_pending)) - wake_up(&request->info->wait_send_payload_pending); - } else { - if (atomic_dec_and_test(&request->info->send_pending)) - wake_up(&request->info->wait_send_pending); - } + if (atomic_dec_and_test(&request->info->send_pending)) + wake_up(&request->info->wait_send_pending); + + wake_up(&request->info->wait_post_send); mempool_free(request, request->info->request_mempool); } @@ -383,27 +380,6 @@ static bool process_negotiation_response( return true; } -/* - * Check and schedule to send an immediate packet - * This is used to extend credtis to remote peer to keep the transport busy - */ -static void check_and_send_immediate(struct smbd_connection *info) -{ - if (info->transport_status != SMBD_CONNECTED) - return; - - info->send_immediate = true; - - /* - * Promptly send a packet if our peer is running low on receive - * credits - */ - if (atomic_read(&info->receive_credits) < - info->receive_credit_target - 1) - queue_delayed_work( - info->workqueue, &info->send_immediate_work, 0); -} - static void smbd_post_send_credits(struct work_struct *work) { int ret = 0; @@ -453,10 +429,16 @@ static void smbd_post_send_credits(struct work_struct *work) info->new_credits_offered += ret; spin_unlock(&info->lock_new_credits_offered); - atomic_add(ret, &info->receive_credits); - - /* Check if we can post new receive and grant credits to peer */ - check_and_send_immediate(info); + /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */ + info->send_immediate = true; + if (atomic_read(&info->receive_credits) < + info->receive_credit_target - 1) { + if (info->keep_alive_requested == KEEP_ALIVE_PENDING || + info->send_immediate) { + log_keep_alive(INFO, "send an empty message\n"); + smbd_post_send_empty(info); + } + } } /* Called from softirq, when recv is done */ @@ -551,12 +533,6 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) info->keep_alive_requested = KEEP_ALIVE_PENDING; } - /* - * Check if we need to send something to remote peer to - * grant more credits or respond to KEEP_ALIVE packet - */ - check_and_send_immediate(info); - return; default: @@ -749,7 +725,6 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->sge[0].addr, request->sge[0].length, request->sge[0].lkey); - request->has_payload = false; atomic_inc(&info->send_pending); rc = ib_post_send(info->id->qp, &send_wr, NULL); if (!rc) @@ -806,45 +781,96 @@ static int manage_keep_alive_before_sending(struct smbd_connection *info) return 0; } -/* - * Build and prepare the SMBD packet header - * This function waits for avaialbe send credits and build a SMBD packet - * header. The caller then optional append payload to the packet after - * the header - * intput values - * size: the size of the payload - * remaining_data_length: remaining data to send if this is part of a - * fragmented packet - * output values - * request_out: the request allocated from this function - * return values: 0 on success, otherwise actual error code returned - */ -static int smbd_create_header(struct smbd_connection *info, - int size, int remaining_data_length, - struct smbd_request **request_out) +/* Post the send request */ +static int smbd_post_send(struct smbd_connection *info, + struct smbd_request *request) +{ + struct ib_send_wr send_wr; + int rc, i; + + for (i = 0; i < request->num_sge; i++) { + log_rdma_send(INFO, + "rdma_request sge[%d] addr=%llu length=%u\n", + i, request->sge[i].addr, request->sge[i].length); + ib_dma_sync_single_for_device( + info->id->device, + request->sge[i].addr, + request->sge[i].length, + DMA_TO_DEVICE); + } + + request->cqe.done = send_done; + + send_wr.next = NULL; + send_wr.wr_cqe = &request->cqe; + send_wr.sg_list = request->sge; + send_wr.num_sge = request->num_sge; + send_wr.opcode = IB_WR_SEND; + send_wr.send_flags = IB_SEND_SIGNALED; + + rc = ib_post_send(info->id->qp, &send_wr, NULL); + if (rc) { + log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); + smbd_disconnect_rdma_connection(info); + rc = -EAGAIN; + } else + /* Reset timer for idle connection after packet is sent */ + mod_delayed_work(info->workqueue, &info->idle_timer_work, + info->keep_alive_interval*HZ); + + return rc; +} + +static int smbd_post_send_sgl(struct smbd_connection *info, + struct scatterlist *sgl, int data_length, int remaining_data_length) { + int num_sgs; + int i, rc; + int header_length; struct smbd_request *request; struct smbd_data_transfer *packet; - int header_length; - int rc; + int new_credits; + struct scatterlist *sg; +wait_credit: /* Wait for send credits. A SMBD packet needs one credit */ rc = wait_event_interruptible(info->wait_send_queue, atomic_read(&info->send_credits) > 0 || info->transport_status != SMBD_CONNECTED); if (rc) - return rc; + goto err_wait_credit; + + if (info->transport_status != SMBD_CONNECTED) { + log_outgoing(ERR, "disconnected not sending on wait_credit\n"); + rc = -EAGAIN; + goto err_wait_credit; + } + if (unlikely(atomic_dec_return(&info->send_credits) < 0)) { + atomic_inc(&info->send_credits); + goto wait_credit; + } + +wait_send_queue: + wait_event(info->wait_post_send, + atomic_read(&info->send_pending) < info->send_credit_target || + info->transport_status != SMBD_CONNECTED); if (info->transport_status != SMBD_CONNECTED) { - log_outgoing(ERR, "disconnected not sending\n"); - return -EAGAIN; + log_outgoing(ERR, "disconnected not sending on wait_send_queue\n"); + rc = -EAGAIN; + goto err_wait_send_queue; + } + + if (unlikely(atomic_inc_return(&info->send_pending) > + info->send_credit_target)) { + atomic_dec(&info->send_pending); + goto wait_send_queue; } - atomic_dec(&info->send_credits); request = mempool_alloc(info->request_mempool, GFP_KERNEL); if (!request) { rc = -ENOMEM; - goto err; + goto err_alloc; } request->info = info; @@ -852,8 +878,11 @@ static int smbd_create_header(struct smbd_connection *info, /* Fill in the packet header */ packet = smbd_request_payload(request); packet->credits_requested = cpu_to_le16(info->send_credit_target); - packet->credits_granted = - cpu_to_le16(manage_credits_prior_sending(info)); + + new_credits = manage_credits_prior_sending(info); + atomic_add(new_credits, &info->receive_credits); + packet->credits_granted = cpu_to_le16(new_credits); + info->send_immediate = false; packet->flags = 0; @@ -861,11 +890,11 @@ static int smbd_create_header(struct smbd_connection *info, packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED); packet->reserved = 0; - if (!size) + if (!data_length) packet->data_offset = 0; else packet->data_offset = cpu_to_le32(24); - packet->data_length = cpu_to_le32(size); + packet->data_length = cpu_to_le32(data_length); packet->remaining_data_length = cpu_to_le32(remaining_data_length); packet->padding = 0; @@ -880,7 +909,7 @@ static int smbd_create_header(struct smbd_connection *info, /* Map the packet to DMA */ header_length = sizeof(struct smbd_data_transfer); /* If this is a packet without payload, don't send padding */ - if (!size) + if (!data_length) header_length = offsetof(struct smbd_data_transfer, padding); request->num_sge = 1; @@ -889,102 +918,15 @@ static int smbd_create_header(struct smbd_connection *info, header_length, DMA_TO_DEVICE); if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { - mempool_free(request, info->request_mempool); rc = -EIO; - goto err; + request->sge[0].addr = 0; + goto err_dma; } request->sge[0].length = header_length; request->sge[0].lkey = info->pd->local_dma_lkey; - *request_out = request; - return 0; - -err: - atomic_inc(&info->send_credits); - return rc; -} - -static void smbd_destroy_header(struct smbd_connection *info, - struct smbd_request *request) -{ - - ib_dma_unmap_single(info->id->device, - request->sge[0].addr, - request->sge[0].length, - DMA_TO_DEVICE); - mempool_free(request, info->request_mempool); - atomic_inc(&info->send_credits); -} - -/* Post the send request */ -static int smbd_post_send(struct smbd_connection *info, - struct smbd_request *request, bool has_payload) -{ - struct ib_send_wr send_wr; - int rc, i; - - for (i = 0; i < request->num_sge; i++) { - log_rdma_send(INFO, - "rdma_request sge[%d] addr=%llu length=%u\n", - i, request->sge[i].addr, request->sge[i].length); - ib_dma_sync_single_for_device( - info->id->device, - request->sge[i].addr, - request->sge[i].length, - DMA_TO_DEVICE); - } - - request->cqe.done = send_done; - - send_wr.next = NULL; - send_wr.wr_cqe = &request->cqe; - send_wr.sg_list = request->sge; - send_wr.num_sge = request->num_sge; - send_wr.opcode = IB_WR_SEND; - send_wr.send_flags = IB_SEND_SIGNALED; - - if (has_payload) { - request->has_payload = true; - atomic_inc(&info->send_payload_pending); - } else { - request->has_payload = false; - atomic_inc(&info->send_pending); - } - - rc = ib_post_send(info->id->qp, &send_wr, NULL); - if (rc) { - log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); - if (has_payload) { - if (atomic_dec_and_test(&info->send_payload_pending)) - wake_up(&info->wait_send_payload_pending); - } else { - if (atomic_dec_and_test(&info->send_pending)) - wake_up(&info->wait_send_pending); - } - smbd_disconnect_rdma_connection(info); - rc = -EAGAIN; - } else - /* Reset timer for idle connection after packet is sent */ - mod_delayed_work(info->workqueue, &info->idle_timer_work, - info->keep_alive_interval*HZ); - - return rc; -} - -static int smbd_post_send_sgl(struct smbd_connection *info, - struct scatterlist *sgl, int data_length, int remaining_data_length) -{ - int num_sgs; - int i, rc; - struct smbd_request *request; - struct scatterlist *sg; - - rc = smbd_create_header( - info, data_length, remaining_data_length, &request); - if (rc) - return rc; - + /* Fill in the packet data payload */ num_sgs = sgl ? sg_nents(sgl) : 0; for_each_sg(sgl, sg, num_sgs, i) { request->sge[i+1].addr = @@ -994,25 +936,41 @@ static int smbd_post_send_sgl(struct smbd_connection *info, info->id->device, request->sge[i+1].addr)) { rc = -EIO; request->sge[i+1].addr = 0; - goto dma_mapping_failure; + goto err_dma; } request->sge[i+1].length = sg->length; request->sge[i+1].lkey = info->pd->local_dma_lkey; request->num_sge++; } - rc = smbd_post_send(info, request, data_length); + rc = smbd_post_send(info, request); if (!rc) return 0; -dma_mapping_failure: - for (i = 1; i < request->num_sge; i++) +err_dma: + for (i = 0; i < request->num_sge; i++) if (request->sge[i].addr) ib_dma_unmap_single(info->id->device, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); - smbd_destroy_header(info, request); + mempool_free(request, info->request_mempool); + + /* roll back receive credits and credits to be offered */ + spin_lock(&info->lock_new_credits_offered); + info->new_credits_offered += new_credits; + spin_unlock(&info->lock_new_credits_offered); + atomic_sub(new_credits, &info->receive_credits); + +err_alloc: + if (atomic_dec_and_test(&info->send_pending)) + wake_up(&info->wait_send_pending); + +err_wait_send_queue: + /* roll back send credits and pending */ + atomic_inc(&info->send_credits); + +err_wait_credit: return rc; } @@ -1334,25 +1292,6 @@ static void destroy_receive_buffers(struct smbd_connection *info) mempool_free(response, info->response_mempool); } -/* - * Check and send an immediate or keep alive packet - * The condition to send those packets are defined in [MS-SMBD] 3.1.1.1 - * Connection.KeepaliveRequested and Connection.SendImmediate - * The idea is to extend credits to server as soon as it becomes available - */ -static void send_immediate_work(struct work_struct *work) -{ - struct smbd_connection *info = container_of( - work, struct smbd_connection, - send_immediate_work.work); - - if (info->keep_alive_requested == KEEP_ALIVE_PENDING || - info->send_immediate) { - log_keep_alive(INFO, "send an empty message\n"); - smbd_post_send_empty(info); - } -} - /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ static void idle_connection_timer(struct work_struct *work) { @@ -1407,14 +1346,10 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "cancelling idle timer\n"); cancel_delayed_work_sync(&info->idle_timer_work); - log_rdma_event(INFO, "cancelling send immediate work\n"); - cancel_delayed_work_sync(&info->send_immediate_work); log_rdma_event(INFO, "wait for all send posted to IB to finish\n"); wait_event(info->wait_send_pending, atomic_read(&info->send_pending) == 0); - wait_event(info->wait_send_payload_pending, - atomic_read(&info->send_payload_pending) == 0); /* It's not posssible for upper layer to get to reassembly */ log_rdma_event(INFO, "drain the reassembly queue\n"); @@ -1744,15 +1679,13 @@ static struct smbd_connection *_smbd_get_connection( init_waitqueue_head(&info->wait_send_queue); INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); - INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work); queue_delayed_work(info->workqueue, &info->idle_timer_work, info->keep_alive_interval*HZ); init_waitqueue_head(&info->wait_send_pending); atomic_set(&info->send_pending, 0); - init_waitqueue_head(&info->wait_send_payload_pending); - atomic_set(&info->send_payload_pending, 0); + init_waitqueue_head(&info->wait_post_send); INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work); INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits); @@ -2226,8 +2159,8 @@ done: * that means all the I/Os have been out and we are good to return */ - wait_event(info->wait_send_payload_pending, - atomic_read(&info->send_payload_pending) == 0); + wait_event(info->wait_send_pending, + atomic_read(&info->send_pending) == 0); return rc; } diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index 8ede915f2b24..a87fca82a796 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -114,8 +114,7 @@ struct smbd_connection { /* Activity accoutning */ atomic_t send_pending; wait_queue_head_t wait_send_pending; - atomic_t send_payload_pending; - wait_queue_head_t wait_send_payload_pending; + wait_queue_head_t wait_post_send; /* Receive queue */ struct list_head receive_queue; @@ -154,7 +153,6 @@ struct smbd_connection { struct workqueue_struct *workqueue; struct delayed_work idle_timer_work; - struct delayed_work send_immediate_work; /* Memory pool for preallocating buffers */ /* request pool for RDMA send */ @@ -234,9 +232,6 @@ struct smbd_request { struct smbd_connection *info; struct ib_cqe cqe; - /* true if this request carries upper layer payload */ - bool has_payload; - /* the SGE entries for this packet */ struct ib_sge sge[SMBDIRECT_MAX_SGE]; int num_sge; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f2dc35c22964..b8d78f393365 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2023,6 +2023,7 @@ lookup_again: goto lookup_again; } + spin_unlock(&ino->i_lock); first = true; status = nfs4_select_rw_stateid(ctx->state, iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ, @@ -2032,12 +2033,12 @@ lookup_again: trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_INVALID_OPEN); - spin_unlock(&ino->i_lock); nfs4_schedule_stateid_recovery(server, ctx->state); pnfs_clear_first_layoutget(lo); pnfs_put_layout_hdr(lo); goto lookup_again; } + spin_lock(&ino->i_lock); } else { nfs4_stateid_copy(&stateid, &lo->plh_stateid); } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 25f135572fc8..e7ddbce48321 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -501,6 +501,7 @@ pnfs_alloc_ds_commits_list(struct list_head *list, rcu_read_lock(); pnfs_put_commit_array(array, cinfo->inode); } + rcu_read_unlock(); return ret; } |