diff options
Diffstat (limited to 'fs')
144 files changed, 3377 insertions, 2908 deletions
diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 64c58eb26159..9eb34701a566 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -55,42 +55,27 @@ int v9fs_random_cachetag(struct v9fs_session_info *v9ses) return scnprintf(v9ses->cachetag, CACHETAG_LEN, "%lu", jiffies); } -static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - struct v9fs_session_info *v9ses; - uint16_t klen = 0; - - v9ses = (struct v9fs_session_info *)cookie_netfs_data; - p9_debug(P9_DEBUG_FSC, "session %p buf %p size %u\n", - v9ses, buffer, bufmax); - - if (v9ses->cachetag) - klen = strlen(v9ses->cachetag); - - if (klen > bufmax) - return 0; - - memcpy(buffer, v9ses->cachetag, klen); - p9_debug(P9_DEBUG_FSC, "cache session tag %s\n", v9ses->cachetag); - return klen; -} - const struct fscache_cookie_def v9fs_cache_session_index_def = { .name = "9P.session", .type = FSCACHE_COOKIE_TYPE_INDEX, - .get_key = v9fs_cache_session_get_key, }; void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) { /* If no cache session tag was specified, we generate a random one. */ - if (!v9ses->cachetag) - v9fs_random_cachetag(v9ses); + if (!v9ses->cachetag) { + if (v9fs_random_cachetag(v9ses) < 0) { + v9ses->fscache = NULL; + return; + } + } v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index, &v9fs_cache_session_index_def, - v9ses, true); + v9ses->cachetag, + strlen(v9ses->cachetag), + NULL, 0, + v9ses, 0, true); p9_debug(P9_DEBUG_FSC, "session %p get cookie %p\n", v9ses, v9ses->fscache); } @@ -99,45 +84,15 @@ void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses) { p9_debug(P9_DEBUG_FSC, "session %p put cookie %p\n", v9ses, v9ses->fscache); - fscache_relinquish_cookie(v9ses->fscache, 0); + fscache_relinquish_cookie(v9ses->fscache, NULL, false); v9ses->fscache = NULL; } - -static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct v9fs_inode *v9inode = cookie_netfs_data; - memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path)); - p9_debug(P9_DEBUG_FSC, "inode %p get key %llu\n", - &v9inode->vfs_inode, v9inode->qid.path); - return sizeof(v9inode->qid.path); -} - -static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, - uint64_t *size) -{ - const struct v9fs_inode *v9inode = cookie_netfs_data; - *size = i_size_read(&v9inode->vfs_inode); - - p9_debug(P9_DEBUG_FSC, "inode %p get attr %llu\n", - &v9inode->vfs_inode, *size); -} - -static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t buflen) -{ - const struct v9fs_inode *v9inode = cookie_netfs_data; - memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version)); - p9_debug(P9_DEBUG_FSC, "inode %p get aux %u\n", - &v9inode->vfs_inode, v9inode->qid.version); - return sizeof(v9inode->qid.version); -} - static enum fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data, const void *buffer, - uint16_t buflen) + uint16_t buflen, + loff_t object_size) { const struct v9fs_inode *v9inode = cookie_netfs_data; @@ -154,9 +109,6 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data, const struct fscache_cookie_def v9fs_cache_inode_index_def = { .name = "9p.inode", .type = FSCACHE_COOKIE_TYPE_DATAFILE, - .get_key = v9fs_cache_inode_get_key, - .get_attr = v9fs_cache_inode_get_attr, - .get_aux = v9fs_cache_inode_get_aux, .check_aux = v9fs_cache_inode_check_aux, }; @@ -175,7 +127,13 @@ void v9fs_cache_inode_get_cookie(struct inode *inode) v9ses = v9fs_inode2v9ses(inode); v9inode->fscache = fscache_acquire_cookie(v9ses->fscache, &v9fs_cache_inode_index_def, - v9inode, true); + &v9inode->qid.path, + sizeof(v9inode->qid.path), + &v9inode->qid.version, + sizeof(v9inode->qid.version), + v9inode, + i_size_read(&v9inode->vfs_inode), + true); p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n", inode, v9inode->fscache); @@ -190,7 +148,8 @@ void v9fs_cache_inode_put_cookie(struct inode *inode) p9_debug(P9_DEBUG_FSC, "inode %p put cookie %p\n", inode, v9inode->fscache); - fscache_relinquish_cookie(v9inode->fscache, 0); + fscache_relinquish_cookie(v9inode->fscache, &v9inode->qid.version, + false); v9inode->fscache = NULL; } @@ -203,7 +162,7 @@ void v9fs_cache_inode_flush_cookie(struct inode *inode) p9_debug(P9_DEBUG_FSC, "inode %p flush cookie %p\n", inode, v9inode->fscache); - fscache_relinquish_cookie(v9inode->fscache, 1); + fscache_relinquish_cookie(v9inode->fscache, NULL, true); v9inode->fscache = NULL; } @@ -236,12 +195,18 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode) old = v9inode->fscache; mutex_lock(&v9inode->fscache_lock); - fscache_relinquish_cookie(v9inode->fscache, 1); + fscache_relinquish_cookie(v9inode->fscache, NULL, true); v9ses = v9fs_inode2v9ses(inode); v9inode->fscache = fscache_acquire_cookie(v9ses->fscache, &v9fs_cache_inode_index_def, - v9inode, true); + &v9inode->qid.path, + sizeof(v9inode->qid.path), + &v9inode->qid.version, + sizeof(v9inode->qid.version), + v9inode, + i_size_read(&v9inode->vfs_inode), + true); p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n", inode, old, v9inode->fscache); @@ -367,7 +332,8 @@ void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) const struct v9fs_inode *v9inode = V9FS_I(inode); p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); - ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL); + ret = fscache_write_page(v9inode->fscache, page, + i_size_read(&v9inode->vfs_inode), GFP_KERNEL); p9_debug(P9_DEBUG_FSC, "ret = %d\n", ret); if (ret != 0) v9fs_uncache_page(inode, page); diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 8fb89ddc6cc7..e622f0f10502 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -292,6 +292,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) #ifdef CONFIG_9P_FSCACHE kfree(v9ses->cachetag); v9ses->cachetag = match_strdup(&args[0]); + if (!v9ses->cachetag) { + ret = -ENOMEM; + goto free_and_return; + } #endif break; case Opt_cache: @@ -471,6 +475,9 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, return fid; err_clnt: +#ifdef CONFIG_9P_FSCACHE + kfree(v9ses->cachetag); +#endif p9_client_destroy(v9ses->clnt); err_names: kfree(v9ses->uname); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index bdabb2765d1b..9ee534159cc6 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -579,6 +579,24 @@ static int v9fs_at_to_dotl_flags(int flags) } /** + * v9fs_dec_count - helper functon to drop i_nlink. + * + * If a directory had nlink <= 2 (including . and ..), then we should not drop + * the link count, which indicates the underlying exported fs doesn't maintain + * nlink accurately. e.g. + * - overlayfs sets nlink to 1 for merged dir + * - ext4 (with dir_nlink feature enabled) sets nlink to 1 if a dir has more + * than EXT4_LINK_MAX (65000) links. + * + * @inode: inode whose nlink is being dropped + */ +static void v9fs_dec_count(struct inode *inode) +{ + if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) + drop_nlink(inode); +} + +/** * v9fs_remove - helper function to remove files and directories * @dir: directory inode that is being deleted * @dentry: dentry that is being deleted @@ -621,9 +639,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) */ if (flags & AT_REMOVEDIR) { clear_nlink(inode); - drop_nlink(dir); + v9fs_dec_count(dir); } else - drop_nlink(inode); + v9fs_dec_count(inode); v9fs_invalidate_inode_attr(inode); v9fs_invalidate_inode_attr(dir); @@ -1024,12 +1042,12 @@ clunk_newdir: if (S_ISDIR(new_inode->i_mode)) clear_nlink(new_inode); else - drop_nlink(new_inode); + v9fs_dec_count(new_inode); } if (S_ISDIR(old_inode->i_mode)) { if (!new_inode) inc_nlink(new_dir); - drop_nlink(old_dir); + v9fs_dec_count(old_dir); } v9fs_invalidate_inode_attr(old_inode); v9fs_invalidate_inode_attr(old_dir); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index af03c2a901eb..48ce50484e80 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -94,7 +94,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, if (v9ses->cache) sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE; - sb->s_flags |= SB_ACTIVE | SB_DIRSYNC | SB_NOATIME; + sb->s_flags |= SB_ACTIVE | SB_DIRSYNC; if (!v9ses->cache) sb->s_flags |= SB_SYNCHRONOUS; diff --git a/fs/afs/cache.c b/fs/afs/cache.c index f62ff71d28c9..b1c31ec4523a 100644 --- a/fs/afs/cache.c +++ b/fs/afs/cache.c @@ -12,167 +12,39 @@ #include <linux/sched.h> #include "internal.h" -static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t buflen); -static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t buflen); - -static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t buflen); -static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, - uint64_t *size); -static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t buflen); static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, const void *buffer, - uint16_t buflen); + uint16_t buflen, + loff_t object_size); struct fscache_netfs afs_cache_netfs = { .name = "afs", - .version = 1, + .version = 2, }; struct fscache_cookie_def afs_cell_cache_index_def = { .name = "AFS.cell", .type = FSCACHE_COOKIE_TYPE_INDEX, - .get_key = afs_cell_cache_get_key, }; struct fscache_cookie_def afs_volume_cache_index_def = { .name = "AFS.volume", .type = FSCACHE_COOKIE_TYPE_INDEX, - .get_key = afs_volume_cache_get_key, }; struct fscache_cookie_def afs_vnode_cache_index_def = { - .name = "AFS.vnode", - .type = FSCACHE_COOKIE_TYPE_DATAFILE, - .get_key = afs_vnode_cache_get_key, - .get_attr = afs_vnode_cache_get_attr, - .get_aux = afs_vnode_cache_get_aux, - .check_aux = afs_vnode_cache_check_aux, + .name = "AFS.vnode", + .type = FSCACHE_COOKIE_TYPE_DATAFILE, + .check_aux = afs_vnode_cache_check_aux, }; /* - * set the key for the index entry - */ -static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct afs_cell *cell = cookie_netfs_data; - uint16_t klen; - - _enter("%p,%p,%u", cell, buffer, bufmax); - - klen = strlen(cell->name); - if (klen > bufmax) - return 0; - - memcpy(buffer, cell->name, klen); - return klen; -} - -/*****************************************************************************/ -/* - * set the key for the volume index entry - */ -static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct afs_volume *volume = cookie_netfs_data; - struct { - u64 volid; - } __packed key; - - _enter("{%u},%p,%u", volume->type, buffer, bufmax); - - if (bufmax < sizeof(key)) - return 0; - - key.volid = volume->vid; - memcpy(buffer, &key, sizeof(key)); - return sizeof(key); -} - -/*****************************************************************************/ -/* - * set the key for the index entry - */ -static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct afs_vnode *vnode = cookie_netfs_data; - struct { - u32 vnode_id[3]; - } __packed key; - - _enter("{%x,%x,%llx},%p,%u", - vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, - buffer, bufmax); - - /* Allow for a 96-bit key */ - memset(&key, 0, sizeof(key)); - key.vnode_id[0] = vnode->fid.vnode; - key.vnode_id[1] = 0; - key.vnode_id[2] = 0; - - if (sizeof(key) > bufmax) - return 0; - - memcpy(buffer, &key, sizeof(key)); - return sizeof(key); -} - -/* - * provide updated file attributes - */ -static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, - uint64_t *size) -{ - const struct afs_vnode *vnode = cookie_netfs_data; - - _enter("{%x,%x,%llx},", - vnode->fid.vnode, vnode->fid.unique, - vnode->status.data_version); - - *size = vnode->status.size; -} - -struct afs_vnode_cache_aux { - u64 data_version; - u32 fid_unique; -} __packed; - -/* - * provide new auxiliary cache data - */ -static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct afs_vnode *vnode = cookie_netfs_data; - struct afs_vnode_cache_aux aux; - - _enter("{%x,%x,%Lx},%p,%u", - vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, - buffer, bufmax); - - memset(&aux, 0, sizeof(aux)); - aux.data_version = vnode->status.data_version; - aux.fid_unique = vnode->fid.unique; - - if (bufmax < sizeof(aux)) - return 0; - - memcpy(buffer, &aux, sizeof(aux)); - return sizeof(aux); -} - -/* * check that the auxiliary data indicates that the entry is still valid */ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, const void *buffer, - uint16_t buflen) + uint16_t buflen, + loff_t object_size) { struct afs_vnode *vnode = cookie_netfs_data; struct afs_vnode_cache_aux aux; @@ -189,12 +61,6 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, return FSCACHE_CHECKAUX_OBSOLETE; } - if (vnode->fid.unique != aux.fid_unique) { - _leave(" = OBSOLETE [uniq %x != %x]", - aux.fid_unique, vnode->fid.unique); - return FSCACHE_CHECKAUX_OBSOLETE; - } - if (vnode->status.data_version != aux.data_version) { _leave(" = OBSOLETE [vers %llx != %llx]", aux.data_version, vnode->status.data_version); diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 3d2c5e0e854e..4235a05afc76 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -522,7 +522,9 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) #ifdef CONFIG_AFS_FSCACHE cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, &afs_cell_cache_index_def, - cell, true); + cell->name, strlen(cell->name), + NULL, 0, + cell, 0, true); #endif ret = afs_proc_cell_setup(net, cell); if (ret < 0) @@ -547,7 +549,7 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) spin_unlock(&net->proc_cells_lock); #ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(cell->cache, 0); + fscache_relinquish_cookie(cell->cache, NULL, false); cell->cache = NULL; #endif diff --git a/fs/afs/file.c b/fs/afs/file.c index a39192ced99e..79e665a35fea 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -339,7 +339,8 @@ int afs_page_filler(void *data, struct page *page) /* send the page to the cache */ #ifdef CONFIG_AFS_FSCACHE if (PageFsCache(page) && - fscache_write_page(vnode->cache, page, GFP_KERNEL) != 0) { + fscache_write_page(vnode->cache, page, vnode->status.size, + GFP_KERNEL) != 0) { fscache_uncache_page(vnode->cache, page); BUG_ON(PageFsCache(page)); } @@ -403,7 +404,8 @@ static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req) /* send the page to the cache */ #ifdef CONFIG_AFS_FSCACHE if (PageFsCache(page) && - fscache_write_page(vnode->cache, page, GFP_KERNEL) != 0) { + fscache_write_page(vnode->cache, page, vnode->status.size, + GFP_KERNEL) != 0) { fscache_uncache_page(vnode->cache, page); BUG_ON(PageFsCache(page)); } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 6b39d0255b72..65c5b1edd338 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -243,6 +243,33 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) } /* + * Get a cache cookie for an inode. + */ +static void afs_get_inode_cache(struct afs_vnode *vnode) +{ +#ifdef CONFIG_AFS_FSCACHE + struct { + u32 vnode_id; + u32 unique; + u32 vnode_id_ext[2]; /* Allow for a 96-bit key */ + } __packed key; + struct afs_vnode_cache_aux aux; + + key.vnode_id = vnode->fid.vnode; + key.unique = vnode->fid.unique; + key.vnode_id_ext[0] = 0; + key.vnode_id_ext[1] = 0; + aux.data_version = vnode->status.data_version; + + vnode->cache = fscache_acquire_cookie(vnode->volume->cache, + &afs_vnode_cache_index_def, + &key, sizeof(key), + &aux, sizeof(aux), + vnode, vnode->status.size, true); +#endif +} + +/* * inode retrieval */ struct inode *afs_iget(struct super_block *sb, struct key *key, @@ -307,11 +334,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, /* set up caching before mapping the status, as map-status reads the * first page of symlinks to see if they're really mountpoints */ inode->i_size = vnode->status.size; -#ifdef CONFIG_AFS_FSCACHE - vnode->cache = fscache_acquire_cookie(vnode->volume->cache, - &afs_vnode_cache_index_def, - vnode, true); -#endif + afs_get_inode_cache(vnode); ret = afs_inode_map_status(vnode, key); if (ret < 0) @@ -327,7 +350,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, /* failure */ bad_inode: #ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(vnode->cache, 0); + fscache_relinquish_cookie(vnode->cache, NULL, ret == -ENOENT); vnode->cache = NULL; #endif iget_failed(inode); @@ -343,6 +366,10 @@ void afs_zap_data(struct afs_vnode *vnode) { _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); +#ifdef CONFIG_AFS_FSCACHE + fscache_invalidate(vnode->cache); +#endif + /* nuke all the non-dirty pages that aren't locked, mapped or being * written back in a regular file and completely discard the pages in a * directory or symlink */ @@ -507,8 +534,14 @@ void afs_evict_inode(struct inode *inode) } #ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(vnode->cache, 0); - vnode->cache = NULL; + { + struct afs_vnode_cache_aux aux; + + aux.data_version = vnode->status.data_version; + fscache_relinquish_cookie(vnode->cache, &aux, + test_bit(AFS_VNODE_DELETED, &vnode->flags)); + vnode->cache = NULL; + } #endif afs_put_permits(vnode->permit_cache); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 72217170b155..a6a1d75eee41 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -559,6 +559,13 @@ struct afs_fs_cursor { #define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ }; +/* + * Cache auxiliary data. + */ +struct afs_vnode_cache_aux { + u64 data_version; +} __packed; + #include <trace/events/afs.h> /*****************************************************************************/ diff --git a/fs/afs/volume.c b/fs/afs/volume.c index b517a588781f..3037bd01f617 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -225,7 +225,9 @@ void afs_activate_volume(struct afs_volume *volume) #ifdef CONFIG_AFS_FSCACHE volume->cache = fscache_acquire_cookie(volume->cell->cache, &afs_volume_cache_index_def, - volume, true); + &volume->vid, sizeof(volume->vid), + NULL, 0, + volume, 0, true); #endif write_lock(&volume->cell->proc_lock); @@ -245,7 +247,7 @@ void afs_deactivate_volume(struct afs_volume *volume) write_unlock(&volume->cell->proc_lock); #ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(volume->cache, + fscache_relinquish_cookie(volume->cache, NULL, test_bit(AFS_VOLUME_DELETED, &volume->flags)); volume->cache = NULL; #endif diff --git a/fs/block_dev.c b/fs/block_dev.c index fe09ef9c21f3..7a506c55a993 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1324,7 +1324,8 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty) * @bdev: struct bdev to adjust. * * This routine checks to see if the bdev size does not match the disk size - * and adjusts it if it differs. + * and adjusts it if it differs. When shrinking the bdev size, its all caches + * are freed. */ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) { @@ -1337,7 +1338,8 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) "%s: detected capacity change from %lld to %lld\n", disk->disk_name, bdev_size, disk_size); i_size_write(bdev->bd_inode, disk_size); - flush_disk(bdev, false); + if (bdev_size > disk_size) + flush_disk(bdev, false); } } EXPORT_SYMBOL(check_disk_size_change); diff --git a/fs/buffer.c b/fs/buffer.c index 9a73924db22f..ec5dd39071e6 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1511,7 +1511,7 @@ void block_invalidatepage(struct page *page, unsigned int offset, * The get_block cached value has been unconditionally invalidated, * so real IO is not possible anymore. */ - if (offset == 0) + if (length == PAGE_SIZE) try_to_release_page(page, 0); out: return; diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index e7f16a77a22a..222bc5d8b62c 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -32,7 +32,7 @@ static struct fscache_object *cachefiles_alloc_object( struct cachefiles_cache *cache; struct cachefiles_xattr *auxdata; unsigned keylen, auxlen; - void *buffer; + void *buffer, *p; char *key; cache = container_of(_cache, struct cachefiles_cache, cache); @@ -65,8 +65,12 @@ static struct fscache_object *cachefiles_alloc_object( if (!buffer) goto nomem_buffer; - keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512); - ASSERTCMP(keylen, <, 512); + keylen = cookie->key_len; + if (keylen <= sizeof(cookie->inline_key)) + p = cookie->inline_key; + else + p = cookie->key; + memcpy(buffer + 2, p, keylen); *(uint16_t *)buffer = keylen; ((char *)buffer)[keylen + 2] = 0; @@ -80,15 +84,17 @@ static struct fscache_object *cachefiles_alloc_object( /* get hold of the auxiliary data and prepend the object type */ auxdata = buffer; - auxlen = 0; - if (cookie->def->get_aux) { - auxlen = cookie->def->get_aux(cookie->netfs_data, - auxdata->data, 511); - ASSERTCMP(auxlen, <, 511); + auxlen = cookie->aux_len; + if (auxlen) { + if (auxlen <= sizeof(cookie->inline_aux)) + p = cookie->inline_aux; + else + p = cookie->aux; + memcpy(auxdata->data, p, auxlen); } auxdata->len = auxlen + 1; - auxdata->type = cookie->def->type; + auxdata->type = cookie->type; lookup_data->auxdata = auxdata; lookup_data->key = key; @@ -177,10 +183,12 @@ static void cachefiles_lookup_complete(struct fscache_object *_object) * increment the usage count on an inode object (may fail if unmounting) */ static -struct fscache_object *cachefiles_grab_object(struct fscache_object *_object) +struct fscache_object *cachefiles_grab_object(struct fscache_object *_object, + enum fscache_obj_ref_trace why) { struct cachefiles_object *object = container_of(_object, struct cachefiles_object, fscache); + int u; _enter("{OBJ%x,%d}", _object->debug_id, atomic_read(&object->usage)); @@ -188,7 +196,9 @@ struct fscache_object *cachefiles_grab_object(struct fscache_object *_object) ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); #endif - atomic_inc(&object->usage); + u = atomic_inc_return(&object->usage); + trace_cachefiles_ref(object, _object->cookie, + (enum cachefiles_obj_ref_trace)why, u); return &object->fscache; } @@ -202,6 +212,7 @@ static void cachefiles_update_object(struct fscache_object *_object) struct cachefiles_cache *cache; struct fscache_cookie *cookie; const struct cred *saved_cred; + const void *aux; unsigned auxlen; _enter("{OBJ%x}", _object->debug_id); @@ -216,26 +227,29 @@ static void cachefiles_update_object(struct fscache_object *_object) } cookie = object->fscache.cookie; + auxlen = cookie->aux_len; - if (!cookie->def->get_aux) { + if (!auxlen) { fscache_unuse_cookie(_object); _leave(" [no aux]"); return; } - auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp); + auxdata = kmalloc(2 + auxlen + 3, cachefiles_gfp); if (!auxdata) { fscache_unuse_cookie(_object); _leave(" [nomem]"); return; } - auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511); + aux = (auxlen <= sizeof(cookie->inline_aux)) ? + cookie->inline_aux : cookie->aux; + + memcpy(auxdata->data, aux, auxlen); fscache_unuse_cookie(_object); - ASSERTCMP(auxlen, <, 511); auxdata->len = auxlen + 1; - auxdata->type = cookie->def->type; + auxdata->type = cookie->type; cachefiles_begin_secure(cache, &saved_cred); cachefiles_update_object_xattr(object, auxdata); @@ -309,10 +323,12 @@ static void cachefiles_drop_object(struct fscache_object *_object) /* * dispose of a reference to an object */ -static void cachefiles_put_object(struct fscache_object *_object) +static void cachefiles_put_object(struct fscache_object *_object, + enum fscache_obj_ref_trace why) { struct cachefiles_object *object; struct fscache_cache *cache; + int u; ASSERT(_object); @@ -328,7 +344,11 @@ static void cachefiles_put_object(struct fscache_object *_object) ASSERTIFCMP(object->fscache.parent, object->fscache.parent->n_children, >, 0); - if (atomic_dec_and_test(&object->usage)) { + u = atomic_dec_return(&object->usage); + trace_cachefiles_ref(object, _object->cookie, + (enum cachefiles_obj_ref_trace)why, u); + ASSERTCMP(u, !=, -1); + if (u == 0) { _debug("- kill object OBJ%x", object->fscache.debug_id); ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); @@ -421,7 +441,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object) loff_t oi_size; int ret; - _object->cookie->def->get_attr(_object->cookie->netfs_data, &ni_size); + ni_size = _object->store_limit_l; _enter("{OBJ%x},[%llu]", _object->debug_id, (unsigned long long) ni_size); @@ -493,8 +513,7 @@ static void cachefiles_invalidate_object(struct fscache_operation *op) cache = container_of(object->fscache.cache, struct cachefiles_cache, cache); - op->object->cookie->def->get_attr(op->object->cookie->netfs_data, - &ni_size); + ni_size = op->object->store_limit_l; _enter("{OBJ%x},[%llu]", op->object->debug_id, (unsigned long long)ni_size); diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index bb3a02ca9da4..d2f6f996e65a 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -124,6 +124,8 @@ struct cachefiles_xattr { uint8_t data[]; }; +#include <trace/events/cachefiles.h> + /* * note change of state for daemon */ diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c index 711f13d8c2de..f54d3f5b2e40 100644 --- a/fs/cachefiles/main.c +++ b/fs/cachefiles/main.c @@ -22,6 +22,7 @@ #include <linux/statfs.h> #include <linux/sysctl.h> #include <linux/miscdevice.h> +#define CREATE_TRACE_POINTS #include "internal.h" unsigned cachefiles_debug; diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 3978b324cbca..0daa1e3fe0df 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -30,11 +30,11 @@ */ static noinline void __cachefiles_printk_object(struct cachefiles_object *object, - const char *prefix, - u8 *keybuf) + const char *prefix) { struct fscache_cookie *cookie; - unsigned keylen, loop; + const u8 *k; + unsigned loop; pr_err("%sobject: OBJ%x\n", prefix, object->fscache.debug_id); pr_err("%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", @@ -56,23 +56,16 @@ void __cachefiles_printk_object(struct cachefiles_object *object, object->fscache.cookie->parent, object->fscache.cookie->netfs_data, object->fscache.cookie->flags); - if (keybuf && cookie->def) - keylen = cookie->def->get_key(cookie->netfs_data, keybuf, - CACHEFILES_KEYBUF_SIZE); - else - keylen = 0; + pr_err("%skey=[%u] '", prefix, cookie->key_len); + k = (cookie->key_len <= sizeof(cookie->inline_key)) ? + cookie->inline_key : cookie->key; + for (loop = 0; loop < cookie->key_len; loop++) + pr_cont("%02x", k[loop]); + pr_cont("'\n"); } else { pr_err("%scookie=NULL\n", prefix); - keylen = 0; } spin_unlock(&object->fscache.lock); - - if (keylen) { - pr_err("%skey=[%u] '", prefix, keylen); - for (loop = 0; loop < keylen; loop++) - pr_cont("%02x", keybuf[loop]); - pr_cont("'\n"); - } } /* @@ -81,14 +74,10 @@ void __cachefiles_printk_object(struct cachefiles_object *object, static noinline void cachefiles_printk_object(struct cachefiles_object *object, struct cachefiles_object *xobject) { - u8 *keybuf; - - keybuf = kmalloc(CACHEFILES_KEYBUF_SIZE, GFP_NOIO); if (object) - __cachefiles_printk_object(object, "", keybuf); + __cachefiles_printk_object(object, ""); if (xobject) - __cachefiles_printk_object(xobject, "x", keybuf); - kfree(keybuf); + __cachefiles_printk_object(xobject, "x"); } /* @@ -120,6 +109,7 @@ static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, } write_unlock(&cache->active_lock); + trace_cachefiles_mark_buried(NULL, dentry, why); _leave(" [no owner]"); return; @@ -130,6 +120,8 @@ found_dentry: object->fscache.state->name, dentry); + trace_cachefiles_mark_buried(object, dentry, why); + if (fscache_object_is_live(&object->fscache)) { pr_err("\n"); pr_err("Error: Can't preemptively bury live object\n"); @@ -158,13 +150,15 @@ static int cachefiles_mark_object_active(struct cachefiles_cache *cache, try_again: write_lock(&cache->active_lock); + dentry = object->dentry; + trace_cachefiles_mark_active(object, dentry); + if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) { pr_err("Error: Object already active\n"); cachefiles_printk_object(object, NULL); BUG(); } - dentry = object->dentry; _p = &cache->active_nodes.rb_node; while (*_p) { _parent = *_p; @@ -191,6 +185,8 @@ try_again: /* an old object from a previous incarnation is hogging the slot - we * need to wait for it to be destroyed */ wait_for_old_object: + trace_cachefiles_wait_active(object, dentry, xobject); + if (fscache_object_is_live(&xobject->fscache)) { pr_err("\n"); pr_err("Error: Unexpected object collision\n"); @@ -248,12 +244,12 @@ wait_for_old_object: ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)); - cache->cache.ops->put_object(&xobject->fscache); + cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_retry); goto try_again; requeue: clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); - cache->cache.ops->put_object(&xobject->fscache); + cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_timeo); _leave(" = -ETIMEDOUT"); return -ETIMEDOUT; } @@ -265,6 +261,11 @@ void cachefiles_mark_object_inactive(struct cachefiles_cache *cache, struct cachefiles_object *object, blkcnt_t i_blocks) { + struct dentry *dentry = object->dentry; + struct inode *inode = d_backing_inode(dentry); + + trace_cachefiles_mark_inactive(object, dentry, inode); + write_lock(&cache->active_lock); rb_erase(&object->active_node, &cache->active_nodes); clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); @@ -288,6 +289,7 @@ void cachefiles_mark_object_inactive(struct cachefiles_cache *cache, * - unlocks the directory mutex */ static int cachefiles_bury_object(struct cachefiles_cache *cache, + struct cachefiles_object *object, struct dentry *dir, struct dentry *rep, bool preemptive, @@ -312,6 +314,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, if (ret < 0) { cachefiles_io_error(cache, "Unlink security error"); } else { + trace_cachefiles_unlink(object, rep, why); ret = vfs_unlink(d_inode(dir), rep, NULL); if (preemptive) @@ -413,6 +416,7 @@ try_again: if (ret < 0) { cachefiles_io_error(cache, "Rename security error %d", ret); } else { + trace_cachefiles_rename(object, rep, grave, why); ret = vfs_rename(d_inode(dir), rep, d_inode(cache->graveyard), grave, NULL, 0); if (ret != 0 && ret != -ENOMEM) @@ -458,7 +462,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, /* we need to check that our parent is _still_ our parent - it * may have been renamed */ if (dir == object->dentry->d_parent) { - ret = cachefiles_bury_object(cache, dir, + ret = cachefiles_bury_object(cache, object, dir, object->dentry, false, FSCACHE_OBJECT_WAS_RETIRED); } else { @@ -486,6 +490,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, { struct cachefiles_cache *cache; struct dentry *dir, *next = NULL; + struct inode *inode; struct path path; unsigned long start; const char *name; @@ -529,13 +534,17 @@ lookup_again: start = jiffies; next = lookup_one_len(name, dir, nlen); cachefiles_hist(cachefiles_lookup_histogram, start); - if (IS_ERR(next)) + if (IS_ERR(next)) { + trace_cachefiles_lookup(object, next, NULL); goto lookup_error; + } - _debug("next -> %p %s", next, d_backing_inode(next) ? "positive" : "negative"); + inode = d_backing_inode(next); + trace_cachefiles_lookup(object, next, inode); + _debug("next -> %p %s", next, inode ? "positive" : "negative"); if (!key) - object->new = !d_backing_inode(next); + object->new = !inode; /* if this element of the path doesn't exist, then the lookup phase * failed, and we can release any readers in the certain knowledge that @@ -558,6 +567,8 @@ lookup_again: start = jiffies; ret = vfs_mkdir(d_inode(dir), next, 0); cachefiles_hist(cachefiles_mkdir_histogram, start); + if (!key) + trace_cachefiles_mkdir(object, next, ret); if (ret < 0) goto create_error; @@ -587,6 +598,7 @@ lookup_again: start = jiffies; ret = vfs_create(d_inode(dir), next, S_IFREG, true); cachefiles_hist(cachefiles_create_histogram, start); + trace_cachefiles_create(object, next, ret); if (ret < 0) goto create_error; @@ -629,7 +641,8 @@ lookup_again: * mutex) */ object->dentry = NULL; - ret = cachefiles_bury_object(cache, dir, next, true, + ret = cachefiles_bury_object(cache, object, dir, next, + true, FSCACHE_OBJECT_IS_STALE); dput(next); next = NULL; @@ -955,7 +968,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, /* actually remove the victim (drops the dir mutex) */ _debug("bury"); - ret = cachefiles_bury_object(cache, dir, victim, false, + ret = cachefiles_bury_object(cache, NULL, dir, victim, false, FSCACHE_OBJECT_WAS_CULLED); if (ret < 0) goto error; diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 883bc7bb12c5..5082c8a49686 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -952,6 +952,7 @@ error: * - cache withdrawal is prevented by the caller */ void cachefiles_uncache_page(struct fscache_object *_object, struct page *page) + __releases(&object->fscache.cookie->lock) { struct cachefiles_object *object; struct cachefiles_cache *cache; diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index d31c1a72d8a5..0a29a00aed2e 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -113,6 +113,7 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object, /* attempt to install the cache metadata directly */ _debug("SET #%u", auxdata->len); + clear_bit(FSCACHE_COOKIE_AUX_UPDATED, &object->fscache.cookie->flags); ret = vfs_setxattr(dentry, cachefiles_xattr_cache, &auxdata->type, auxdata->len, XATTR_CREATE); @@ -141,6 +142,7 @@ int cachefiles_update_object_xattr(struct cachefiles_object *object, /* attempt to install the cache metadata directly */ _debug("SET #%u", auxdata->len); + clear_bit(FSCACHE_COOKIE_AUX_UPDATED, &object->fscache.cookie->flags); ret = vfs_setxattr(dentry, cachefiles_xattr_cache, &auxdata->type, auxdata->len, XATTR_REPLACE); @@ -180,7 +182,8 @@ int cachefiles_check_auxdata(struct cachefiles_object *object) goto error; xlen--; - validity = fscache_check_aux(&object->fscache, &auxbuf->data, xlen); + validity = fscache_check_aux(&object->fscache, &auxbuf->data, xlen, + i_size_read(d_backing_inode(dentry))); if (validity != FSCACHE_CHECKAUX_OKAY) goto error; @@ -249,7 +252,8 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object, object->fscache.cookie->def->name, dlen); result = fscache_check_aux(&object->fscache, - &auxbuf->data, dlen); + &auxbuf->data, dlen, + i_size_read(d_backing_inode(dentry))); switch (result) { /* entry okay as is */ diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index a3ab265d3215..33a211b364ed 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -27,7 +27,6 @@ struct ceph_aux_inode { u64 version; struct timespec mtime; - loff_t size; }; struct fscache_netfs ceph_cache_netfs = { @@ -41,34 +40,15 @@ static LIST_HEAD(ceph_fscache_list); struct ceph_fscache_entry { struct list_head list; struct fscache_cookie *fscache; - struct ceph_fsid fsid; size_t uniq_len; + /* The following members must be last */ + struct ceph_fsid fsid; char uniquifier[0]; }; -static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t maxbuf) -{ - const struct ceph_fs_client* fsc = cookie_netfs_data; - const char *fscache_uniq = fsc->mount_options->fscache_uniq; - uint16_t fsid_len, uniq_len; - - fsid_len = sizeof(fsc->client->fsid); - uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0; - if (fsid_len + uniq_len > maxbuf) - return 0; - - memcpy(buffer, &fsc->client->fsid, fsid_len); - if (uniq_len) - memcpy(buffer + fsid_len, fscache_uniq, uniq_len); - - return fsid_len + uniq_len; -} - static const struct fscache_cookie_def ceph_fscache_fsid_object_def = { .name = "CEPH.fsid", .type = FSCACHE_COOKIE_TYPE_INDEX, - .get_key = ceph_fscache_session_get_key, }; int ceph_fscache_register(void) @@ -110,16 +90,19 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc) goto out_unlock; } + memcpy(&ent->fsid, fsid, sizeof(*fsid)); + if (uniq_len > 0) { + memcpy(&ent->uniquifier, fscache_uniq, uniq_len); + ent->uniq_len = uniq_len; + } + fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index, &ceph_fscache_fsid_object_def, - fsc, true); + &ent->fsid, sizeof(ent->fsid) + uniq_len, + NULL, 0, + fsc, 0, true); if (fsc->fscache) { - memcpy(&ent->fsid, fsid, sizeof(*fsid)); - if (uniq_len > 0) { - memcpy(&ent->uniquifier, fscache_uniq, uniq_len); - ent->uniq_len = uniq_len; - } ent->fscache = fsc->fscache; list_add_tail(&ent->list, &ceph_fscache_list); } else { @@ -133,59 +116,21 @@ out_unlock: return err; } -static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t maxbuf) -{ - const struct ceph_inode_info* ci = cookie_netfs_data; - uint16_t klen; - - /* use ceph virtual inode (id + snapshot) */ - klen = sizeof(ci->i_vino); - if (klen > maxbuf) - return 0; - - memcpy(buffer, &ci->i_vino, klen); - return klen; -} - -static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - struct ceph_aux_inode aux; - const struct ceph_inode_info* ci = cookie_netfs_data; - const struct inode* inode = &ci->vfs_inode; - - memset(&aux, 0, sizeof(aux)); - aux.version = ci->i_version; - aux.mtime = inode->i_mtime; - aux.size = i_size_read(inode); - - memcpy(buffer, &aux, sizeof(aux)); - - return sizeof(aux); -} - -static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data, - uint64_t *size) -{ - const struct ceph_inode_info* ci = cookie_netfs_data; - *size = i_size_read(&ci->vfs_inode); -} - static enum fscache_checkaux ceph_fscache_inode_check_aux( - void *cookie_netfs_data, const void *data, uint16_t dlen) + void *cookie_netfs_data, const void *data, uint16_t dlen, + loff_t object_size) { struct ceph_aux_inode aux; struct ceph_inode_info* ci = cookie_netfs_data; struct inode* inode = &ci->vfs_inode; - if (dlen != sizeof(aux)) + if (dlen != sizeof(aux) || + i_size_read(inode) != object_size) return FSCACHE_CHECKAUX_OBSOLETE; memset(&aux, 0, sizeof(aux)); aux.version = ci->i_version; aux.mtime = inode->i_mtime; - aux.size = i_size_read(inode); if (memcmp(data, &aux, sizeof(aux)) != 0) return FSCACHE_CHECKAUX_OBSOLETE; @@ -197,9 +142,6 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux( static const struct fscache_cookie_def ceph_fscache_inode_object_def = { .name = "CEPH.inode", .type = FSCACHE_COOKIE_TYPE_DATAFILE, - .get_key = ceph_fscache_inode_get_key, - .get_attr = ceph_fscache_inode_get_attr, - .get_aux = ceph_fscache_inode_get_aux, .check_aux = ceph_fscache_inode_check_aux, }; @@ -207,6 +149,7 @@ void ceph_fscache_register_inode_cookie(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_aux_inode aux; /* No caching for filesystem */ if (!fsc->fscache) @@ -218,9 +161,14 @@ void ceph_fscache_register_inode_cookie(struct inode *inode) inode_lock_nested(inode, I_MUTEX_CHILD); if (!ci->fscache) { + memset(&aux, 0, sizeof(aux)); + aux.version = ci->i_version; + aux.mtime = inode->i_mtime; ci->fscache = fscache_acquire_cookie(fsc->fscache, - &ceph_fscache_inode_object_def, - ci, false); + &ceph_fscache_inode_object_def, + &ci->i_vino, sizeof(ci->i_vino), + &aux, sizeof(aux), + ci, i_size_read(inode), false); } inode_unlock(inode); } @@ -235,7 +183,7 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci) ci->fscache = NULL; fscache_uncache_all_inode_pages(cookie, &ci->vfs_inode); - fscache_relinquish_cookie(cookie, 0); + fscache_relinquish_cookie(cookie, &ci->i_vino, false); } static bool ceph_fscache_can_enable(void *data) @@ -254,11 +202,11 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp) if (inode_is_open_for_write(inode)) { dout("fscache_file_set_cookie %p %p disabling cache\n", inode, filp); - fscache_disable_cookie(ci->fscache, false); + fscache_disable_cookie(ci->fscache, &ci->i_vino, false); fscache_uncache_all_inode_pages(ci->fscache, inode); } else { - fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable, - inode); + fscache_enable_cookie(ci->fscache, &ci->i_vino, i_size_read(inode), + ceph_fscache_can_enable, inode); if (fscache_cookie_enabled(ci->fscache)) { dout("fscache_file_set_cookie %p %p enabling cache\n", inode, filp); @@ -351,7 +299,8 @@ void ceph_readpage_to_fscache(struct inode *inode, struct page *page) if (!cache_valid(ci)) return; - ret = fscache_write_page(ci->fscache, page, GFP_KERNEL); + ret = fscache_write_page(ci->fscache, page, i_size_read(inode), + GFP_KERNEL); if (ret) fscache_uncache_page(ci->fscache, page); } @@ -385,7 +334,7 @@ void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc) WARN_ON_ONCE(!found); mutex_unlock(&ceph_fscache_lock); - __fscache_relinquish_cookie(fsc->fscache, 0); + __fscache_relinquish_cookie(fsc->fscache, NULL, false); } fsc->fscache = NULL; } @@ -402,7 +351,7 @@ void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci) * truncate while the caller holds CEPH_CAP_FILE_RD */ mutex_lock(&ci->i_truncate_mutex); if (!cache_valid(ci)) { - if (fscache_check_consistency(ci->fscache)) + if (fscache_check_consistency(ci->fscache, &ci->i_vino)) fscache_invalidate(ci->fscache); spin_lock(&ci->i_ceph_lock); ci->i_fscache_gen = ci->i_rdcache_gen; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f1d9c6cc0491..2bdd561c4c68 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -2,7 +2,6 @@ #include <linux/ceph/ceph_debug.h> #include <linux/spinlock.h> -#include <linux/fs_struct.h> #include <linux/namei.h> #include <linux/slab.h> #include <linux/sched.h> diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c index 2c14020e5e1d..edf5f40898bf 100644 --- a/fs/cifs/cache.c +++ b/fs/cifs/cache.c @@ -46,67 +46,11 @@ void cifs_fscache_unregister(void) } /* - * Key layout of CIFS server cache index object - */ -struct cifs_server_key { - uint16_t family; /* address family */ - __be16 port; /* IP port */ - union { - struct in_addr ipv4_addr; - struct in6_addr ipv6_addr; - } addr[0]; -}; - -/* - * Server object keyed by {IPaddress,port,family} tuple - */ -static uint16_t cifs_server_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t maxbuf) -{ - const struct TCP_Server_Info *server = cookie_netfs_data; - const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr; - const struct sockaddr_in *addr = (struct sockaddr_in *) sa; - const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa; - struct cifs_server_key *key = buffer; - uint16_t key_len = sizeof(struct cifs_server_key); - - memset(key, 0, key_len); - - /* - * Should not be a problem as sin_family/sin6_family overlays - * sa_family field - */ - switch (sa->sa_family) { - case AF_INET: - key->family = sa->sa_family; - key->port = addr->sin_port; - key->addr[0].ipv4_addr = addr->sin_addr; - key_len += sizeof(key->addr[0].ipv4_addr); - break; - - case AF_INET6: - key->family = sa->sa_family; - key->port = addr6->sin6_port; - key->addr[0].ipv6_addr = addr6->sin6_addr; - key_len += sizeof(key->addr[0].ipv6_addr); - break; - - default: - cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family); - key_len = 0; - break; - } - - return key_len; -} - -/* * Server object for FS-Cache */ const struct fscache_cookie_def cifs_fscache_server_index_def = { .name = "CIFS.server", .type = FSCACHE_COOKIE_TYPE_INDEX, - .get_key = cifs_server_get_key, }; /* @@ -116,7 +60,7 @@ struct cifs_fscache_super_auxdata { u64 resource_id; /* unique server resource id */ }; -static char *extract_sharename(const char *treename) +char *extract_sharename(const char *treename) { const char *src; char *delim, *dst; @@ -140,56 +84,11 @@ static char *extract_sharename(const char *treename) return dst; } -/* - * Superblock object currently keyed by share name - */ -static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer, - uint16_t maxbuf) -{ - const struct cifs_tcon *tcon = cookie_netfs_data; - char *sharename; - uint16_t len; - - sharename = extract_sharename(tcon->treeName); - if (IS_ERR(sharename)) { - cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__); - sharename = NULL; - return 0; - } - - len = strlen(sharename); - if (len > maxbuf) - return 0; - - memcpy(buffer, sharename, len); - - kfree(sharename); - - return len; -} - -static uint16_t -cifs_fscache_super_get_aux(const void *cookie_netfs_data, void *buffer, - uint16_t maxbuf) -{ - struct cifs_fscache_super_auxdata auxdata; - const struct cifs_tcon *tcon = cookie_netfs_data; - - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.resource_id = tcon->resource_id; - - if (maxbuf > sizeof(auxdata)) - maxbuf = sizeof(auxdata); - - memcpy(buffer, &auxdata, maxbuf); - - return maxbuf; -} - static enum fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data, const void *data, - uint16_t datalen) + uint16_t datalen, + loff_t object_size) { struct cifs_fscache_super_auxdata auxdata; const struct cifs_tcon *tcon = cookie_netfs_data; @@ -212,68 +111,14 @@ fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data, const struct fscache_cookie_def cifs_fscache_super_index_def = { .name = "CIFS.super", .type = FSCACHE_COOKIE_TYPE_INDEX, - .get_key = cifs_super_get_key, - .get_aux = cifs_fscache_super_get_aux, .check_aux = cifs_fscache_super_check_aux, }; -/* - * Auxiliary data attached to CIFS inode within the cache - */ -struct cifs_fscache_inode_auxdata { - struct timespec last_write_time; - struct timespec last_change_time; - u64 eof; -}; - -static uint16_t cifs_fscache_inode_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t maxbuf) -{ - const struct cifsInodeInfo *cifsi = cookie_netfs_data; - uint16_t keylen; - - /* use the UniqueId as the key */ - keylen = sizeof(cifsi->uniqueid); - if (keylen > maxbuf) - keylen = 0; - else - memcpy(buffer, &cifsi->uniqueid, keylen); - - return keylen; -} - -static void -cifs_fscache_inode_get_attr(const void *cookie_netfs_data, uint64_t *size) -{ - const struct cifsInodeInfo *cifsi = cookie_netfs_data; - - *size = cifsi->vfs_inode.i_size; -} - -static uint16_t -cifs_fscache_inode_get_aux(const void *cookie_netfs_data, void *buffer, - uint16_t maxbuf) -{ - struct cifs_fscache_inode_auxdata auxdata; - const struct cifsInodeInfo *cifsi = cookie_netfs_data; - - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.eof = cifsi->server_eof; - auxdata.last_write_time = cifsi->vfs_inode.i_mtime; - auxdata.last_change_time = cifsi->vfs_inode.i_ctime; - - if (maxbuf > sizeof(auxdata)) - maxbuf = sizeof(auxdata); - - memcpy(buffer, &auxdata, maxbuf); - - return maxbuf; -} - static enum fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data, const void *data, - uint16_t datalen) + uint16_t datalen, + loff_t object_size) { struct cifs_fscache_inode_auxdata auxdata; struct cifsInodeInfo *cifsi = cookie_netfs_data; @@ -295,8 +140,5 @@ fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data, const struct fscache_cookie_def cifs_fscache_inode_object_def = { .name = "CIFS.uniqueid", .type = FSCACHE_COOKIE_TYPE_DATAFILE, - .get_key = cifs_fscache_inode_get_key, - .get_attr = cifs_fscache_inode_get_attr, - .get_aux = cifs_fscache_inode_get_aux, .check_aux = cifs_fscache_inode_check_aux, }; diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 8d4b7bc8ae91..25d3f66b2d50 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -23,11 +23,63 @@ #include "cifs_debug.h" #include "cifs_fs_sb.h" +/* + * Key layout of CIFS server cache index object + */ +struct cifs_server_key { + struct { + uint16_t family; /* address family */ + __be16 port; /* IP port */ + } hdr; + union { + struct in_addr ipv4_addr; + struct in6_addr ipv6_addr; + }; +} __packed; + +/* + * Get a cookie for a server object keyed by {IPaddress,port,family} tuple + */ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) { + const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr; + const struct sockaddr_in *addr = (struct sockaddr_in *) sa; + const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa; + struct cifs_server_key key; + uint16_t key_len = sizeof(key.hdr); + + memset(&key, 0, sizeof(key)); + + /* + * Should not be a problem as sin_family/sin6_family overlays + * sa_family field + */ + key.hdr.family = sa->sa_family; + switch (sa->sa_family) { + case AF_INET: + key.hdr.port = addr->sin_port; + key.ipv4_addr = addr->sin_addr; + key_len += sizeof(key.ipv4_addr); + break; + + case AF_INET6: + key.hdr.port = addr6->sin6_port; + key.ipv6_addr = addr6->sin6_addr; + key_len += sizeof(key.ipv6_addr); + break; + + default: + cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family); + server->fscache = NULL; + return; + } + server->fscache = fscache_acquire_cookie(cifs_fscache_netfs.primary_index, - &cifs_fscache_server_index_def, server, true); + &cifs_fscache_server_index_def, + &key, key_len, + NULL, 0, + server, 0, true); cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, server, server->fscache); } @@ -36,17 +88,29 @@ void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) { cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, server, server->fscache); - fscache_relinquish_cookie(server->fscache, 0); + fscache_relinquish_cookie(server->fscache, NULL, false); server->fscache = NULL; } void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) { struct TCP_Server_Info *server = tcon->ses->server; + char *sharename; + + sharename = extract_sharename(tcon->treeName); + if (IS_ERR(sharename)) { + cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__); + tcon->fscache = NULL; + return; + } tcon->fscache = fscache_acquire_cookie(server->fscache, - &cifs_fscache_super_index_def, tcon, true); + &cifs_fscache_super_index_def, + sharename, strlen(sharename), + &tcon->resource_id, sizeof(tcon->resource_id), + tcon, 0, true); + kfree(sharename); cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, server->fscache, tcon->fscache); } @@ -54,10 +118,28 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) { cifs_dbg(FYI, "%s: (0x%p)\n", __func__, tcon->fscache); - fscache_relinquish_cookie(tcon->fscache, 0); + fscache_relinquish_cookie(tcon->fscache, &tcon->resource_id, false); tcon->fscache = NULL; } +static void cifs_fscache_acquire_inode_cookie(struct cifsInodeInfo *cifsi, + struct cifs_tcon *tcon) +{ + struct cifs_fscache_inode_auxdata auxdata; + + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.eof = cifsi->server_eof; + auxdata.last_write_time = cifsi->vfs_inode.i_mtime; + auxdata.last_change_time = cifsi->vfs_inode.i_ctime; + + cifsi->fscache = + fscache_acquire_cookie(tcon->fscache, + &cifs_fscache_inode_object_def, + &cifsi->uniqueid, sizeof(cifsi->uniqueid), + &auxdata, sizeof(auxdata), + cifsi, cifsi->vfs_inode.i_size, true); +} + static void cifs_fscache_enable_inode_cookie(struct inode *inode) { struct cifsInodeInfo *cifsi = CIFS_I(inode); @@ -67,21 +149,28 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode) if (cifsi->fscache) return; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) { - cifsi->fscache = fscache_acquire_cookie(tcon->fscache, - &cifs_fscache_inode_object_def, cifsi, true); - cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n", - __func__, tcon->fscache, cifsi->fscache); - } + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE)) + return; + + cifs_fscache_acquire_inode_cookie(cifsi, tcon); + + cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n", + __func__, tcon->fscache, cifsi->fscache); } void cifs_fscache_release_inode_cookie(struct inode *inode) { + struct cifs_fscache_inode_auxdata auxdata; struct cifsInodeInfo *cifsi = CIFS_I(inode); if (cifsi->fscache) { + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.eof = cifsi->server_eof; + auxdata.last_write_time = cifsi->vfs_inode.i_mtime; + auxdata.last_change_time = cifsi->vfs_inode.i_ctime; + cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache); - fscache_relinquish_cookie(cifsi->fscache, 0); + fscache_relinquish_cookie(cifsi->fscache, &auxdata, false); cifsi->fscache = NULL; } } @@ -93,7 +182,7 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode) if (cifsi->fscache) { cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache); fscache_uncache_all_inode_pages(cifsi->fscache, inode); - fscache_relinquish_cookie(cifsi->fscache, 1); + fscache_relinquish_cookie(cifsi->fscache, NULL, true); cifsi->fscache = NULL; } } @@ -110,16 +199,14 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode) { struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); struct fscache_cookie *old = cifsi->fscache; if (cifsi->fscache) { /* retire the current fscache cache and get a new one */ - fscache_relinquish_cookie(cifsi->fscache, 1); + fscache_relinquish_cookie(cifsi->fscache, NULL, true); - cifsi->fscache = fscache_acquire_cookie( - cifs_sb_master_tcon(cifs_sb)->fscache, - &cifs_fscache_inode_object_def, - cifsi, true); + cifs_fscache_acquire_inode_cookie(cifsi, tcon); cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n", __func__, cifsi->fscache, old); } @@ -214,13 +301,15 @@ int __cifs_readpages_from_fscache(struct inode *inode, void __cifs_readpage_to_fscache(struct inode *inode, struct page *page) { + struct cifsInodeInfo *cifsi = CIFS_I(inode); int ret; cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n", - __func__, CIFS_I(inode)->fscache, page, inode); - ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL); + __func__, cifsi->fscache, page, inode); + ret = fscache_write_page(cifsi->fscache, page, + cifsi->vfs_inode.i_size, GFP_KERNEL); if (ret != 0) - fscache_uncache_page(CIFS_I(inode)->fscache, page); + fscache_uncache_page(cifsi->fscache, page); } void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages) @@ -239,4 +328,3 @@ void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode) fscache_wait_on_page_write(cookie, page); fscache_uncache_page(cookie, page); } - diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h index 24794b6cd8ec..c7e3ac251e16 100644 --- a/fs/cifs/fscache.h +++ b/fs/cifs/fscache.h @@ -27,6 +27,18 @@ #ifdef CONFIG_CIFS_FSCACHE +/* + * Auxiliary data attached to CIFS inode within the cache + */ +struct cifs_fscache_inode_auxdata { + struct timespec last_write_time; + struct timespec last_change_time; + u64 eof; +}; + +/* + * cache.c + */ extern struct fscache_netfs cifs_fscache_netfs; extern const struct fscache_cookie_def cifs_fscache_server_index_def; extern const struct fscache_cookie_def cifs_fscache_super_index_def; @@ -34,6 +46,7 @@ extern const struct fscache_cookie_def cifs_fscache_inode_object_def; extern int cifs_fscache_register(void); extern void cifs_fscache_unregister(void); +extern char *extract_sharename(const char *); /* * fscache.c diff --git a/fs/direct-io.c b/fs/direct-io.c index 1357ef563893..874607bb6e02 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -315,8 +315,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) dio_warn_stale_pagecache(dio->iocb->ki_filp); } - if (!(dio->flags & DIO_SKIP_DIO_COUNT)) - inode_dio_end(dio->inode); + inode_dio_end(dio->inode); if (flags & DIO_COMPLETE_ASYNC) { /* @@ -1178,9 +1177,9 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, unsigned blkbits = i_blkbits; unsigned blocksize_mask = (1 << blkbits) - 1; ssize_t retval = -EINVAL; - size_t count = iov_iter_count(iter); + const size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; - loff_t end = offset + count; + const loff_t end = offset + count; struct dio *dio; struct dio_submit sdio = { 0, }; struct buffer_head map_bh = { 0, }; @@ -1201,7 +1200,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, } /* watch out for a 0 len io from a tricksy fs */ - if (iov_iter_rw(iter) == READ && !iov_iter_count(iter)) + if (iov_iter_rw(iter) == READ && !count) return 0; dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); @@ -1252,8 +1251,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, */ if (is_sync_kiocb(iocb)) dio->is_async = false; - else if (!(dio->flags & DIO_ASYNC_EXTEND) && - iov_iter_rw(iter) == WRITE && end > i_size_read(inode)) + else if (iov_iter_rw(iter) == WRITE && end > i_size_read(inode)) dio->is_async = false; else dio->is_async = true; @@ -1297,8 +1295,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, /* * Will be decremented at I/O completion time. */ - if (!(dio->flags & DIO_SKIP_DIO_COUNT)) - inode_dio_begin(inode); + inode_dio_begin(inode); retval = 0; sdio.blkbits = blkbits; @@ -1318,8 +1315,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, dio->should_dirty = (iter->type == ITER_IOVEC); sdio.iter = iter; - sdio.final_block_in_request = - (offset + iov_iter_count(iter)) >> blkbits; + sdio.final_block_in_request = end >> blkbits; /* * In case of non-aligned buffers, we may need 2 more diff --git a/fs/exec.c b/fs/exec.c index 7eb8d21bcab9..a919a827d181 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -895,13 +895,13 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size, if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0) return -EINVAL; - ret = security_kernel_read_file(file, id); + ret = deny_write_access(file); if (ret) return ret; - ret = deny_write_access(file); + ret = security_kernel_read_file(file, id); if (ret) - return ret; + goto out; i_size = i_size_read(file_inode(file)); if (max_size > 0 && i_size > max_size) { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7666c065b96f..de1694512f1f 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -827,7 +827,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) unsigned long logic_sb_block; unsigned long offset = 0; unsigned long def_mount_opts; - long ret = -EINVAL; + long ret = -ENOMEM; int blocksize = BLOCK_SIZE; int db_count; int i, j; @@ -835,7 +835,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) int err; struct ext2_mount_options opts; - err = -ENOMEM; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) goto failed; @@ -851,6 +850,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_daxdev = dax_dev; spin_lock_init(&sbi->s_lock); + ret = -EINVAL; /* * See what the current blocksize for the device is, and diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 18aa2ef963ad..129205028300 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5024,12 +5024,12 @@ static int other_inode_match(struct inode * inode, unsigned long ino, if ((inode->i_ino != ino) || (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | - I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || + I_DIRTY_INODE)) || ((inode->i_state & I_DIRTY_TIME) == 0)) return 0; spin_lock(&inode->i_lock); if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | - I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) && + I_DIRTY_INODE)) == 0) && (inode->i_state & I_DIRTY_TIME)) { struct ext4_inode_info *ei = EXT4_I(inode); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 512dca8abc7d..bf779461df13 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -68,6 +68,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, + .is_meta = is_meta, }; if (unlikely(!is_meta)) @@ -162,6 +163,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, .in_list = false, + .is_meta = (type != META_POR), }; struct blk_plug plug; @@ -569,13 +571,8 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) struct node_info ni; int err = acquire_orphan_inode(sbi); - if (err) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", - __func__, ino); - return err; - } + if (err) + goto err_out; __add_ino_entry(sbi, ino, 0, ORPHAN_INO); @@ -589,6 +586,11 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) return PTR_ERR(inode); } + err = dquot_initialize(inode); + if (err) + goto err_out; + + dquot_initialize(inode); clear_nlink(inode); /* truncate all the data during iput */ @@ -598,14 +600,18 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) /* ENOMEM was fully retried in f2fs_evict_inode. */ if (ni.blk_addr != NULL_ADDR) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x) by kernel, retry mount.", - __func__, ino); - return -EIO; + err = -EIO; + goto err_out; } __remove_ino_entry(sbi, ino, ORPHAN_INO); return 0; + +err_out: + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); + return err; } int recover_orphan_inodes(struct f2fs_sb_info *sbi) @@ -1136,6 +1142,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cpc->reason & CP_TRIMMED) __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); + else + __clear_ckpt_flags(ckpt, CP_TRIMMED_FLAG); if (cpc->reason & CP_UMOUNT) __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); @@ -1162,6 +1170,39 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) spin_unlock_irqrestore(&sbi->cp_lock, flags); } +static void commit_checkpoint(struct f2fs_sb_info *sbi, + void *src, block_t blk_addr) +{ + struct writeback_control wbc = { + .for_reclaim = 0, + }; + + /* + * pagevec_lookup_tag and lock_page again will take + * some extra time. Therefore, update_meta_pages and + * sync_meta_pages are combined in this function. + */ + struct page *page = grab_meta_page(sbi, blk_addr); + int err; + + memcpy(page_address(page), src, PAGE_SIZE); + set_page_dirty(page); + + f2fs_wait_on_page_writeback(page, META, true); + f2fs_bug_on(sbi, PageWriteback(page)); + if (unlikely(!clear_page_dirty_for_io(page))) + f2fs_bug_on(sbi, 1); + + /* writeout cp pack 2 page */ + err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO); + f2fs_bug_on(sbi, err); + + f2fs_put_page(page, 0); + + /* submit checkpoint (with barrier if NOBARRIER is not set) */ + f2fs_submit_merged_write(sbi, META_FLUSH); +} + static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1264,16 +1305,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } } - /* need to wait for end_io results */ - wait_on_all_pages_writeback(sbi); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - - /* flush all device cache */ - err = f2fs_flush_device_cache(sbi); - if (err) - return err; - /* write out checkpoint buffer at block 0 */ update_meta_page(sbi, ckpt, start_blk++); @@ -1301,26 +1332,26 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += NR_CURSEG_NODE_TYPE; } - /* writeout checkpoint block */ - update_meta_page(sbi, ckpt, start_blk); + /* update user_block_counts */ + sbi->last_valid_block_count = sbi->total_valid_block_count; + percpu_counter_set(&sbi->alloc_valid_block_count, 0); + + /* Here, we have one bio having CP pack except cp pack 2 page */ + sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - /* wait for previous submitted node/meta pages writeback */ + /* wait for previous submitted meta pages writeback */ wait_on_all_pages_writeback(sbi); if (unlikely(f2fs_cp_error(sbi))) return -EIO; - filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX); - filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX); - - /* update user_block_counts */ - sbi->last_valid_block_count = sbi->total_valid_block_count; - percpu_counter_set(&sbi->alloc_valid_block_count, 0); - - /* Here, we only have one bio having CP pack */ - sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO); + /* flush all device cache */ + err = f2fs_flush_device_cache(sbi); + if (err) + return err; - /* wait for previous submitted meta pages writeback */ + /* barrier and flush checkpoint cp pack 2 page if it can */ + commit_checkpoint(sbi, ckpt, start_blk); wait_on_all_pages_writeback(sbi); release_ino_entry(sbi, false); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7578ed1a85e0..db50686f5096 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -175,15 +175,22 @@ static bool __same_bdev(struct f2fs_sb_info *sbi, */ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, struct writeback_control *wbc, - int npages, bool is_read) + int npages, bool is_read, + enum page_type type, enum temp_type temp) { struct bio *bio; bio = f2fs_bio_alloc(sbi, npages, true); f2fs_target_device(sbi, blk_addr, bio); - bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; - bio->bi_private = is_read ? NULL : sbi; + if (is_read) { + bio->bi_end_io = f2fs_read_end_io; + bio->bi_private = NULL; + } else { + bio->bi_end_io = f2fs_write_end_io; + bio->bi_private = sbi; + bio->bi_write_hint = io_type_to_rw_hint(sbi, type, temp); + } if (wbc) wbc_init_bio(wbc, bio); @@ -196,13 +203,12 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (!is_read_io(bio_op(bio))) { unsigned int start; - if (f2fs_sb_mounted_blkzoned(sbi->sb) && - current->plug && (type == DATA || type == NODE)) - blk_finish_plug(current->plug); - if (type != DATA && type != NODE) goto submit_io; + if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug) + blk_finish_plug(current->plug); + start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; start %= F2FS_IO_SIZE(sbi); @@ -377,12 +383,13 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; + verify_block_addr(fio, fio->new_blkaddr); trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); /* Allocate a new bio */ bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, - 1, is_read_io(fio->op)); + 1, is_read_io(fio->op), fio->type, fio->temp); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); @@ -422,8 +429,8 @@ next: } if (fio->old_blkaddr != NEW_ADDR) - verify_block_addr(sbi, fio->old_blkaddr); - verify_block_addr(sbi, fio->new_blkaddr); + verify_block_addr(fio, fio->old_blkaddr); + verify_block_addr(fio, fio->new_blkaddr); bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; @@ -445,7 +452,8 @@ alloc_new: goto out_fail; } io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc, - BIO_MAX_PAGES, false); + BIO_MAX_PAGES, false, + fio->type, fio->temp); io->fio = *fio; } @@ -832,13 +840,6 @@ alloc: return 0; } -static inline bool __force_buffered_io(struct inode *inode, int rw) -{ - return (f2fs_encrypted_file(inode) || - (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || - F2FS_I_SB(inode)->s_ndevs); -} - int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); @@ -870,7 +871,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) if (direct_io) { map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint); - flag = __force_buffered_io(inode, WRITE) ? + flag = f2fs_force_buffered_io(inode, WRITE) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; goto map_blocks; @@ -1114,6 +1115,31 @@ out: return err; } +bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) +{ + struct f2fs_map_blocks map; + block_t last_lblk; + int err; + + if (pos + len > i_size_read(inode)) + return false; + + map.m_lblk = F2FS_BYTES_TO_BLK(pos); + map.m_next_pgofs = NULL; + map.m_next_extent = NULL; + map.m_seg_type = NO_CHECK_TYPE; + last_lblk = F2FS_BLK_ALIGN(pos + len); + + while (map.m_lblk < last_lblk) { + map.m_len = last_lblk - map.m_lblk; + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); + if (err || map.m_len == 0) + return false; + map.m_lblk += map.m_len; + } + return true; +} + static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create, int flag, pgoff_t *next_pgofs, int seg_type) @@ -2287,25 +2313,41 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; int rw = iov_iter_rw(iter); int err; + enum rw_hint hint = iocb->ki_hint; + int whint_mode = F2FS_OPTION(sbi).whint_mode; err = check_direct_IO(inode, iter, offset); if (err) return err; - if (__force_buffered_io(inode, rw)) + if (f2fs_force_buffered_io(inode, rw)) return 0; trace_f2fs_direct_IO_enter(inode, offset, count, rw); - down_read(&F2FS_I(inode)->dio_rwsem[rw]); + if (rw == WRITE && whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = WRITE_LIFE_NOT_SET; + + if (!down_read_trylock(&F2FS_I(inode)->dio_rwsem[rw])) { + if (iocb->ki_flags & IOCB_NOWAIT) { + iocb->ki_hint = hint; + err = -EAGAIN; + goto out; + } + down_read(&F2FS_I(inode)->dio_rwsem[rw]); + } + err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); up_read(&F2FS_I(inode)->dio_rwsem[rw]); if (rw == WRITE) { + if (whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = hint; if (err > 0) { f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, err); @@ -2315,6 +2357,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } } +out: trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); return err; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index f00b5ed8c011..fe661274ff10 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -94,14 +94,12 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, struct f2fs_dir_entry *de; struct f2fs_dentry_ptr d; - dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); + dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); make_dentry_ptr_block(NULL, &d, dentry_blk); de = find_target_dentry(fname, namehash, max_slots, &d); if (de) *res_page = dentry_page; - else - kunmap(dentry_page); return de; } @@ -287,7 +285,6 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, de = f2fs_find_entry(dir, qstr, page); if (de) { res = le32_to_cpu(de->ino); - f2fs_dentry_kunmap(dir, *page); f2fs_put_page(*page, 0); } @@ -302,7 +299,6 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_wait_on_page_writeback(page, type, true); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode->i_mode); - f2fs_dentry_kunmap(dir, page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = current_time(dir); @@ -350,13 +346,11 @@ static int make_empty_dir(struct inode *inode, if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); - dentry_blk = kmap_atomic(dentry_page); + dentry_blk = page_address(dentry_page); make_dentry_ptr_block(NULL, &d, dentry_blk); do_make_empty_dir(inode, parent, &d); - kunmap_atomic(dentry_blk); - set_page_dirty(dentry_page); f2fs_put_page(dentry_page, 1); return 0; @@ -367,6 +361,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, struct page *dpage) { struct page *page; + int dummy_encrypt = DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(dir)); int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { @@ -393,7 +388,8 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, if (err) goto put_error; - if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) { + if ((f2fs_encrypted_inode(dir) || dummy_encrypt) && + f2fs_may_encrypt(inode)) { err = fscrypt_inherit_context(dir, inode, page, false); if (err) goto put_error; @@ -402,8 +398,6 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, page = get_node_page(F2FS_I_SB(dir), inode->i_ino); if (IS_ERR(page)) return page; - - set_cold_node(inode, page); } if (new_name) { @@ -547,13 +541,12 @@ start: if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); - dentry_blk = kmap(dentry_page); + dentry_blk = page_address(dentry_page); bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; - kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } @@ -588,7 +581,6 @@ fail: if (inode) up_write(&F2FS_I(inode)->i_sem); - kunmap(dentry_page); f2fs_put_page(dentry_page, 1); return err; @@ -642,7 +634,6 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, F2FS_I(dir)->task = NULL; } if (de) { - f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); err = -EEXIST; } else if (IS_ERR(page)) { @@ -713,7 +704,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); - add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); + if (F2FS_OPTION(F2FS_I_SB(dir)).fsync_mode == FSYNC_MODE_STRICT) + add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); if (f2fs_has_inline_dentry(dir)) return f2fs_delete_inline_entry(dentry, page, dir, inode); @@ -730,7 +722,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, 0); - kunmap(page); /* kunmap - pair of f2fs_find_entry */ set_page_dirty(page); dir->i_ctime = dir->i_mtime = current_time(dir); @@ -775,7 +766,7 @@ bool f2fs_empty_dir(struct inode *dir) return false; } - dentry_blk = kmap_atomic(dentry_page); + dentry_blk = page_address(dentry_page); if (bidx == 0) bit_pos = 2; else @@ -783,7 +774,6 @@ bool f2fs_empty_dir(struct inode *dir) bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, bit_pos); - kunmap_atomic(dentry_blk); f2fs_put_page(dentry_page, 1); @@ -901,19 +891,17 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) } } - dentry_blk = kmap(dentry_page); + dentry_blk = page_address(dentry_page); make_dentry_ptr_block(inode, &d, dentry_blk); err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); if (err) { - kunmap(dentry_page); f2fs_put_page(dentry_page, 1); break; } - kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } out_free: diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index ff2352a0ed15..d5a861bf2b42 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -460,7 +460,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode, struct rb_node *insert_parent) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct rb_node **p = &et->root.rb_node; + struct rb_node **p; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -706,6 +706,9 @@ void f2fs_drop_extent_tree(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree; + if (!f2fs_may_extent_tree(inode)) + return; + set_inode_flag(inode, FI_NO_EXTENT); write_lock(&et->lock); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6300ac5bcbe4..1df7f10476d6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -98,9 +98,10 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define F2FS_MOUNT_RESERVE_ROOT 0x01000000 -#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) -#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) -#define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option) +#define F2FS_OPTION(sbi) ((sbi)->mount_opt) +#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) (F2FS_OPTION(sbi).opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) (F2FS_OPTION(sbi).opt & F2FS_MOUNT_##option) #define ver_after(a, b) (typecheck(unsigned long long, a) && \ typecheck(unsigned long long, b) && \ @@ -113,7 +114,26 @@ typedef u32 block_t; /* typedef u32 nid_t; struct f2fs_mount_info { - unsigned int opt; + unsigned int opt; + int write_io_size_bits; /* Write IO size bits */ + block_t root_reserved_blocks; /* root reserved blocks */ + kuid_t s_resuid; /* reserved blocks for uid */ + kgid_t s_resgid; /* reserved blocks for gid */ + int active_logs; /* # of active logs */ + int inline_xattr_size; /* inline xattr size */ +#ifdef CONFIG_F2FS_FAULT_INJECTION + struct f2fs_fault_info fault_info; /* For fault injection */ +#endif +#ifdef CONFIG_QUOTA + /* Names of quota files with journalled quota */ + char *s_qf_names[MAXQUOTAS]; + int s_jquota_fmt; /* Format of quota to use */ +#endif + /* For which write hints are passed down to block layer */ + int whint_mode; + int alloc_mode; /* segment allocation policy */ + int fsync_mode; /* fsync policy */ + bool test_dummy_encryption; /* test dummy encryption */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -125,6 +145,8 @@ struct f2fs_mount_info { #define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040 #define F2FS_FEATURE_QUOTA_INO 0x0080 #define F2FS_FEATURE_INODE_CRTIME 0x0100 +#define F2FS_FEATURE_LOST_FOUND 0x0200 +#define F2FS_FEATURE_VERITY 0x0400 /* reserved */ #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -450,7 +472,7 @@ static inline void make_dentry_ptr_block(struct inode *inode, d->inode = inode; d->max = NR_DENTRY_IN_BLOCK; d->nr_bitmap = SIZE_OF_DENTRY_BITMAP; - d->bitmap = &t->dentry_bitmap; + d->bitmap = t->dentry_bitmap; d->dentry = t->dentry; d->filename = t->filename; } @@ -576,6 +598,8 @@ enum { #define FADVISE_ENCRYPT_BIT 0x04 #define FADVISE_ENC_NAME_BIT 0x08 #define FADVISE_KEEP_SIZE_BIT 0x10 +#define FADVISE_HOT_BIT 0x20 +#define FADVISE_VERITY_BIT 0x40 /* reserved */ #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) @@ -590,6 +614,9 @@ enum { #define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) #define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT) #define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT) +#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT) +#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT) +#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT) #define DEF_DIR_LEVEL 0 @@ -637,6 +664,7 @@ struct f2fs_inode_info { kprojid_t i_projid; /* id for project quota */ int i_inline_xattr_size; /* inline xattr size */ struct timespec i_crtime; /* inode creation time */ + struct timespec i_disk_time[4]; /* inode disk times */ }; static inline void get_extent_info(struct extent_info *ext, @@ -743,7 +771,7 @@ struct f2fs_nm_info { unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */ spinlock_t nid_list_lock; /* protect nid lists ops */ struct mutex build_lock; /* lock for build free nids */ - unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; + unsigned char **free_nid_bitmap; unsigned char *nat_block_bitmap; unsigned short *free_nid_count; /* free nid count of NAT block */ @@ -976,6 +1004,7 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ + bool is_meta; /* indicate borrow meta inode mapping or not */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ }; @@ -1037,10 +1066,34 @@ enum { MAX_TIME, }; +enum { + WHINT_MODE_OFF, /* not pass down write hints */ + WHINT_MODE_USER, /* try to pass down hints given by users */ + WHINT_MODE_FS, /* pass down hints with F2FS policy */ +}; + +enum { + ALLOC_MODE_DEFAULT, /* stay default */ + ALLOC_MODE_REUSE, /* reuse segments as much as possible */ +}; + +enum fsync_mode { + FSYNC_MODE_POSIX, /* fsync follows posix semantics */ + FSYNC_MODE_STRICT, /* fsync behaves in line with ext4 */ +}; + +#ifdef CONFIG_F2FS_FS_ENCRYPTION +#define DUMMY_ENCRYPTION_ENABLED(sbi) \ + (unlikely(F2FS_OPTION(sbi).test_dummy_encryption)) +#else +#define DUMMY_ENCRYPTION_ENABLED(sbi) (0) +#endif + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct f2fs_super_block *raw_super; /* raw super block pointer */ + struct rw_semaphore sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ @@ -1060,7 +1113,6 @@ struct f2fs_sb_info { struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; /* bio ordering for NODE/DATA */ - int write_io_size_bits; /* Write IO size bits */ mempool_t *write_io_dummy; /* Dummy pages */ /* for checkpoint */ @@ -1110,9 +1162,7 @@ struct f2fs_sb_info { unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ - int active_logs; /* # of active logs */ int dir_level; /* directory level */ - int inline_xattr_size; /* inline xattr size */ unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */ int readdir_ra; /* readahead inode in readdir */ @@ -1122,9 +1172,6 @@ struct f2fs_sb_info { block_t last_valid_block_count; /* for recovery */ block_t reserved_blocks; /* configurable reserved blocks */ block_t current_reserved_blocks; /* current reserved blocks */ - block_t root_reserved_blocks; /* root reserved blocks */ - kuid_t s_resuid; /* reserved blocks for uid */ - kgid_t s_resgid; /* reserved blocks for gid */ unsigned int nquota_files; /* # of quota sysfile */ @@ -1209,17 +1256,6 @@ struct f2fs_sb_info { /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; - - /* For fault injection */ -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct f2fs_fault_info fault_info; -#endif - -#ifdef CONFIG_QUOTA - /* Names of quota files with journalled quota */ - char *s_qf_names[MAXQUOTAS]; - int s_jquota_fmt; /* Format of quota to use */ -#endif }; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -1229,7 +1265,7 @@ struct f2fs_sb_info { __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { - struct f2fs_fault_info *ffi = &sbi->fault_info; + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (!ffi->inject_rate) return false; @@ -1586,12 +1622,12 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, return false; if (IS_NOQUOTA(inode)) return true; - if (capable(CAP_SYS_RESOURCE)) + if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid())) return true; - if (uid_eq(sbi->s_resuid, current_fsuid())) + if (!gid_eq(F2FS_OPTION(sbi).s_resgid, GLOBAL_ROOT_GID) && + in_group_p(F2FS_OPTION(sbi).s_resgid)) return true; - if (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && - in_group_p(sbi->s_resgid)) + if (capable(CAP_SYS_RESOURCE)) return true; return false; } @@ -1627,7 +1663,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, sbi->current_reserved_blocks; if (!__allow_reserved_blocks(sbi, inode)) - avail_user_block_count -= sbi->root_reserved_blocks; + avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; @@ -1762,6 +1798,12 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); int offset; + if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) { + offset = (flag == SIT_BITMAP) ? + le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0; + return &ckpt->sit_nat_version_bitmap + offset; + } + if (__cp_payload(sbi) > 0) { if (flag == NAT_BITMAP) return &ckpt->sit_nat_version_bitmap; @@ -1828,7 +1870,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, sbi->current_reserved_blocks + 1; if (!__allow_reserved_blocks(sbi, inode)) - valid_block_count += sbi->root_reserved_blocks; + valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); @@ -2399,12 +2441,6 @@ static inline int f2fs_has_inline_dentry(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_DENTRY); } -static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) -{ - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); -} - static inline int is_file(struct inode *inode, int type) { return F2FS_I(inode)->i_advise & type; @@ -2436,7 +2472,17 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) } if (!is_inode_flag_set(inode, FI_AUTO_RECOVER) || file_keep_isize(inode) || - i_size_read(inode) & PAGE_MASK) + i_size_read(inode) & ~PAGE_MASK) + return false; + + if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3, + &F2FS_I(inode)->i_crtime)) return false; down_read(&F2FS_I(inode)->i_sem); @@ -2446,9 +2492,9 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) return ret; } -static inline int f2fs_readonly(struct super_block *sb) +static inline bool f2fs_readonly(struct super_block *sb) { - return sb->s_flags & SB_RDONLY; + return sb_rdonly(sb); } static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) @@ -2596,6 +2642,8 @@ void handle_failed_inode(struct inode *inode); /* * namei.c */ +int update_extension_list(struct f2fs_sb_info *sbi, const char *name, + bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); /* @@ -2768,6 +2816,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi); int __init create_segment_manager_caches(void); void destroy_segment_manager_caches(void); int rw_hint_to_seg_type(enum rw_hint hint); +enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, + enum temp_type temp); /* * checkpoint.c @@ -2850,6 +2900,7 @@ int f2fs_release_page(struct page *page, gfp_t wait); int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode); #endif +bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); /* * gc.c @@ -3172,45 +3223,21 @@ static inline bool f2fs_bio_encrypted(struct bio *bio) return bio->bi_private != NULL; } -static inline int f2fs_sb_has_crypto(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); -} - -static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED); -} - -static inline int f2fs_sb_has_extra_attr(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_EXTRA_ATTR); -} - -static inline int f2fs_sb_has_project_quota(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_PRJQUOTA); -} - -static inline int f2fs_sb_has_inode_chksum(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM); -} - -static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR); -} - -static inline int f2fs_sb_has_quota_ino(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_QUOTA_INO); +#define F2FS_FEATURE_FUNCS(name, flagname) \ +static inline int f2fs_sb_has_##name(struct super_block *sb) \ +{ \ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \ } -static inline int f2fs_sb_has_inode_crtime(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CRTIME); -} +F2FS_FEATURE_FUNCS(encrypt, ENCRYPT); +F2FS_FEATURE_FUNCS(blkzoned, BLKZONED); +F2FS_FEATURE_FUNCS(extra_attr, EXTRA_ATTR); +F2FS_FEATURE_FUNCS(project_quota, PRJQUOTA); +F2FS_FEATURE_FUNCS(inode_chksum, INODE_CHKSUM); +F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); +F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); +F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); +F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, @@ -3230,7 +3257,7 @@ static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) { struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); - return blk_queue_discard(q) || f2fs_sb_mounted_blkzoned(sbi->sb); + return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb); } static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) @@ -3259,4 +3286,11 @@ static inline bool f2fs_may_encrypt(struct inode *inode) #endif } +static inline bool f2fs_force_buffered_io(struct inode *inode, int rw) +{ + return (f2fs_encrypted_file(inode) || + (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || + F2FS_I_SB(inode)->s_ndevs); +} + #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 672a542e5464..6b94f19b3fa8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -163,9 +163,10 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) cp_reason = CP_NODE_NEED_CP; else if (test_opt(sbi, FASTBOOT)) cp_reason = CP_FASTBOOT_MODE; - else if (sbi->active_logs == 2) + else if (F2FS_OPTION(sbi).active_logs == 2) cp_reason = CP_SPEC_LOG_NUM; - else if (need_dentry_mark(sbi, inode->i_ino) && + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && + need_dentry_mark(sbi, inode->i_ino) && exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; @@ -479,6 +480,9 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; + + filp->f_mode |= FMODE_NOWAIT; + return dquot_file_open(inode, filp); } @@ -569,7 +573,6 @@ truncate_out: int truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; @@ -578,7 +581,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) trace_f2fs_truncate_blocks_enter(inode, from); - free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= sbi->max_file_blocks) goto free_partial; @@ -1348,8 +1351,12 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, } out: - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) - f2fs_i_size_write(inode, new_size); + if (new_size > i_size_read(inode)) { + if (mode & FALLOC_FL_KEEP_SIZE) + file_set_keep_isize(inode); + else + f2fs_i_size_write(inode, new_size); + } out_sem: up_write(&F2FS_I(inode)->i_mmap_sem); @@ -1711,6 +1718,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) inode_lock(inode); + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + if (f2fs_is_volatile_file(inode)) goto err_out; @@ -1729,6 +1738,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } err_out: + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1938,7 +1948,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - if (!f2fs_sb_has_crypto(inode->i_sb)) + if (!f2fs_sb_has_encrypt(inode->i_sb)) return -EOPNOTSUPP; f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); @@ -1948,7 +1958,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { - if (!f2fs_sb_has_crypto(file_inode(filp)->i_sb)) + if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -1959,16 +1969,18 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (!f2fs_sb_has_crypto(inode->i_sb)) + if (!f2fs_sb_has_encrypt(inode->i_sb)) return -EOPNOTSUPP; - if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) - goto got_it; - err = mnt_want_write_file(filp); if (err) return err; + down_write(&sbi->sb_lock); + + if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) + goto got_it; + /* update superblock with uuid */ generate_random_uuid(sbi->raw_super->encrypt_pw_salt); @@ -1976,15 +1988,16 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) if (err) { /* undo new data */ memset(sbi->raw_super->encrypt_pw_salt, 0, 16); - mnt_drop_write_file(filp); - return err; + goto out_err; } - mnt_drop_write_file(filp); got_it: if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, 16)) - return -EFAULT; - return 0; + err = -EFAULT; +out_err: + up_write(&sbi->sb_lock); + mnt_drop_write_file(filp); + return err; } static int f2fs_ioc_gc(struct file *filp, unsigned long arg) @@ -2045,8 +2058,10 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) return ret; end = range.start + range.len; - if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) - return -EINVAL; + if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) { + ret = -EINVAL; + goto out; + } do_more: if (!range.sync) { if (!mutex_trylock(&sbi->gc_mutex)) { @@ -2885,25 +2900,54 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; - inode_lock(inode); + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) + return -EINVAL; + + if (!inode_trylock(inode)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_lock(inode); + } + ret = generic_write_checks(iocb, from); if (ret > 0) { + bool preallocated = false; + size_t target_size = 0; int err; if (iov_iter_fault_in_readable(from, iov_iter_count(from))) set_inode_flag(inode, FI_NO_PREALLOC); - err = f2fs_preallocate_blocks(iocb, from); - if (err) { - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); - return err; + if ((iocb->ki_flags & IOCB_NOWAIT) && + (iocb->ki_flags & IOCB_DIRECT)) { + if (!f2fs_overwrite_io(inode, iocb->ki_pos, + iov_iter_count(from)) || + f2fs_has_inline_data(inode) || + f2fs_force_buffered_io(inode, WRITE)) { + inode_unlock(inode); + return -EAGAIN; + } + + } else { + preallocated = true; + target_size = iocb->ki_pos + iov_iter_count(from); + + err = f2fs_preallocate_blocks(iocb, from); + if (err) { + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + return err; + } } blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); blk_finish_plug(&plug); clear_inode_flag(inode, FI_NO_PREALLOC); + /* if we couldn't write data, we should deallocate blocks. */ + if (preallocated && i_size_read(inode) < target_size) + f2fs_truncate(inode); + if (ret > 0) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index aa720cc44509..bfb7a4a3a929 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -76,14 +76,15 @@ static int gc_thread_func(void *data) * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. */ - if (!mutex_trylock(&sbi->gc_mutex)) - goto next; - if (gc_th->gc_urgent) { wait_ms = gc_th->urgent_sleep_time; + mutex_lock(&sbi->gc_mutex); goto do_gc; } + if (!mutex_trylock(&sbi->gc_mutex)) + goto next; + if (!is_idle(sbi)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); @@ -161,12 +162,17 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) { int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; - if (gc_th && gc_th->gc_idle) { + if (!gc_th) + return gc_mode; + + if (gc_th->gc_idle) { if (gc_th->gc_idle == 1) gc_mode = GC_CB; else if (gc_th->gc_idle == 2) gc_mode = GC_GREEDY; } + if (gc_th->gc_urgent) + gc_mode = GC_GREEDY; return gc_mode; } @@ -188,11 +194,14 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, } /* we need to check every dirty segments in the FG_GC case */ - if (gc_type != FG_GC && p->max_search > sbi->max_victim_search) + if (gc_type != FG_GC && + (sbi->gc_thread && !sbi->gc_thread->gc_urgent) && + p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; - /* let's select beginning hot/small space first */ - if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + /* let's select beginning hot/small space first in no_heap mode*/ + if (test_opt(sbi, NOHEAP) && + (type == CURSEG_HOT_DATA || IS_NODESEG(type))) p->offset = 0; else p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 90e38d8ea688..3b77d6421218 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -369,7 +369,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, f2fs_wait_on_page_writeback(page, DATA, true); zero_user_segment(page, MAX_INLINE_DATA(dir), PAGE_SIZE); - dentry_blk = kmap_atomic(page); + dentry_blk = page_address(page); make_dentry_ptr_inline(dir, &src, inline_dentry); make_dentry_ptr_block(dir, &dst, dentry_blk); @@ -386,7 +386,6 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max); memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN); - kunmap_atomic(dentry_blk); if (!PageUptodate(page)) SetPageUptodate(page); set_page_dirty(page); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 205add3d0f3a..e0d9e8f27ed2 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -284,6 +284,10 @@ static int do_read_inode(struct inode *inode) fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); } + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; + F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; + F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; + F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); @@ -328,7 +332,7 @@ make_now: inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); + inode_nohighmem(inode); } else if (S_ISLNK(inode->i_mode)) { if (f2fs_encrypted_inode(inode)) inode->i_op = &f2fs_encrypted_symlink_inode_operations; @@ -439,12 +443,15 @@ void update_inode(struct inode *inode, struct page *node_page) } __set_inode_rdev(inode, ri); - set_cold_node(inode, node_page); /* deleted inode */ if (inode->i_nlink == 0) clear_inline_node(node_page); + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; + F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; + F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; + F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; } void update_inode_page(struct inode *inode) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index b68e7b03959f..d5098efe577c 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -78,7 +78,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) set_inode_flag(inode, FI_NEW_INODE); /* If the directory encrypted, then we should encrypt the inode. */ - if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) + if ((f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) && + f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); if (f2fs_sb_has_extra_attr(sbi->sb)) { @@ -97,7 +98,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); if (f2fs_has_inline_xattr(inode)) - xattr_size = sbi->inline_xattr_size; + xattr_size = F2FS_OPTION(sbi).inline_xattr_size; /* Otherwise, will be 0 */ } else if (f2fs_has_inline_xattr(inode) || f2fs_has_inline_dentry(inode)) { @@ -142,7 +143,7 @@ fail_drop: return ERR_PTR(err); } -static int is_multimedia_file(const unsigned char *s, const char *sub) +static int is_extension_exist(const unsigned char *s, const char *sub) { size_t slen = strlen(s); size_t sublen = strlen(sub); @@ -168,19 +169,94 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) /* * Set multimedia files as cold files for hot/cold data separation */ -static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, +static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) { - int i; - __u8 (*extlist)[8] = sbi->raw_super->extension_list; + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int i, cold_count, hot_count; + + down_read(&sbi->sb_lock); + + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; - int count = le32_to_cpu(sbi->raw_super->extension_count); - for (i = 0; i < count; i++) { - if (is_multimedia_file(name, extlist[i])) { + for (i = 0; i < cold_count + hot_count; i++) { + if (!is_extension_exist(name, extlist[i])) + continue; + if (i < cold_count) file_set_cold(inode); - break; - } + else + file_set_hot(inode); + break; } + + up_read(&sbi->sb_lock); +} + +int update_extension_list(struct f2fs_sb_info *sbi, const char *name, + bool hot, bool set) +{ + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int cold_count = le32_to_cpu(sbi->raw_super->extension_count); + int hot_count = sbi->raw_super->hot_ext_count; + int total_count = cold_count + hot_count; + int start, count; + int i; + + if (set) { + if (total_count == F2FS_MAX_EXTENSION) + return -EINVAL; + } else { + if (!hot && !cold_count) + return -EINVAL; + if (hot && !hot_count) + return -EINVAL; + } + + if (hot) { + start = cold_count; + count = total_count; + } else { + start = 0; + count = cold_count; + } + + for (i = start; i < count; i++) { + if (strcmp(name, extlist[i])) + continue; + + if (set) + return -EINVAL; + + memcpy(extlist[i], extlist[i + 1], + F2FS_EXTENSION_LEN * (total_count - i - 1)); + memset(extlist[total_count - 1], 0, F2FS_EXTENSION_LEN); + if (hot) + sbi->raw_super->hot_ext_count = hot_count - 1; + else + sbi->raw_super->extension_count = + cpu_to_le32(cold_count - 1); + return 0; + } + + if (!set) + return -EINVAL; + + if (hot) { + strncpy(extlist[count], name, strlen(name)); + sbi->raw_super->hot_ext_count = hot_count + 1; + } else { + char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN]; + + memcpy(buf, &extlist[cold_count], + F2FS_EXTENSION_LEN * hot_count); + memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN); + strncpy(extlist[cold_count], name, strlen(name)); + memcpy(&extlist[cold_count + 1], buf, + F2FS_EXTENSION_LEN * hot_count); + sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1); + } + return 0; } static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, @@ -203,7 +279,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, return PTR_ERR(inode); if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) - set_cold_files(sbi, inode, dentry->d_name.name); + set_file_temperature(sbi, inode, dentry->d_name.name); inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; @@ -317,7 +393,6 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) de = f2fs_find_entry(dir, &dot, &page); if (de) { - f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); } else if (IS_ERR(page)) { err = PTR_ERR(page); @@ -329,14 +404,12 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) } de = f2fs_find_entry(dir, &dotdot, &page); - if (de) { - f2fs_dentry_kunmap(dir, page); + if (de) f2fs_put_page(page, 0); - } else if (IS_ERR(page)) { + else if (IS_ERR(page)) err = PTR_ERR(page); - } else { + else err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); - } out: if (!err) clear_inode_flag(dir, FI_INLINE_DOTS); @@ -377,7 +450,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } ino = le32_to_cpu(de->ino); - f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); @@ -452,7 +524,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); goto fail; } @@ -579,7 +650,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); + inode_nohighmem(inode); set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); @@ -717,10 +788,12 @@ out: static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { - if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + if (unlikely(f2fs_cp_error(sbi))) return -EIO; - if (f2fs_encrypted_inode(dir)) { + if (f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) { int err = fscrypt_get_encryption_info(dir); if (err) return err; @@ -893,16 +966,15 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } if (old_dir_entry) { - if (old_dir != new_dir && !whiteout) { + if (old_dir != new_dir && !whiteout) f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); - } else { - f2fs_dentry_kunmap(old_inode, old_dir_page); + else f2fs_put_page(old_dir_page, 0); - } f2fs_i_links_write(old_dir, false); } - add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); f2fs_unlock_op(sbi); @@ -912,20 +984,15 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, put_out_dir: f2fs_unlock_op(sbi); - if (new_page) { - f2fs_dentry_kunmap(new_dir, new_page); + if (new_page) f2fs_put_page(new_page, 0); - } out_whiteout: if (whiteout) iput(whiteout); out_dir: - if (old_dir_entry) { - f2fs_dentry_kunmap(old_inode, old_dir_page); + if (old_dir_entry) f2fs_put_page(old_dir_page, 0); - } out_old: - f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; @@ -1057,8 +1124,10 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_mark_inode_dirty_sync(new_dir, false); - add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); - add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { + add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); + add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + } f2fs_unlock_op(sbi); @@ -1067,19 +1136,15 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, return 0; out_new_dir: if (new_dir_entry) { - f2fs_dentry_kunmap(new_inode, new_dir_page); f2fs_put_page(new_dir_page, 0); } out_old_dir: if (old_dir_entry) { - f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_new: - f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_old: - f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 177c438e4a56..9a99243054ba 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -193,8 +193,8 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) __free_nat_entry(e); } -static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, - struct nat_entry *ne) +static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i, + struct nat_entry *ne) { nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; @@ -209,15 +209,36 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, head->entry_cnt = 0; f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } + return head; +} + +static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, + struct nat_entry *ne) +{ + struct nat_entry_set *head; + bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR; + + if (!new_ne) + head = __grab_nat_entry_set(nm_i, ne); + + /* + * update entry_cnt in below condition: + * 1. update NEW_ADDR to valid block address; + * 2. update old block address to new one; + */ + if (!new_ne && (get_nat_flag(ne, IS_PREALLOC) || + !get_nat_flag(ne, IS_DIRTY))) + head->entry_cnt++; + + set_nat_flag(ne, IS_PREALLOC, new_ne); if (get_nat_flag(ne, IS_DIRTY)) goto refresh_list; nm_i->dirty_nat_cnt++; - head->entry_cnt++; set_nat_flag(ne, IS_DIRTY, true); refresh_list: - if (nat_get_blkaddr(ne) == NEW_ADDR) + if (new_ne) list_del_init(&ne->list); else list_move_tail(&ne->list, &head->entry_list); @@ -1076,7 +1097,7 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) f2fs_wait_on_page_writeback(page, NODE, true); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); - set_cold_node(dn->inode, page); + set_cold_node(page, S_ISDIR(dn->inode->i_mode)); if (!PageUptodate(page)) SetPageUptodate(page); if (set_page_dirty(page)) @@ -2291,6 +2312,7 @@ retry: if (!PageUptodate(ipage)) SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); + set_cold_node(page, false); src = F2FS_INODE(page); dst = F2FS_INODE(ipage); @@ -2580,8 +2602,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) if (!enabled_nat_bits(sbi, NULL)) return 0; - nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 + - F2FS_BLKSIZE - 1); + nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); nm_i->nat_bits = f2fs_kzalloc(sbi, nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); if (!nm_i->nat_bits) @@ -2707,12 +2728,20 @@ static int init_node_manager(struct f2fs_sb_info *sbi) static int init_free_nid_cache(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); + int i; - nm_i->free_nid_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks * - NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL); + nm_i->free_nid_bitmap = f2fs_kzalloc(sbi, nm_i->nat_blocks * + sizeof(unsigned char *), GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; + for (i = 0; i < nm_i->nat_blocks; i++) { + nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi, + NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL); + if (!nm_i->free_nid_bitmap) + return -ENOMEM; + } + nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8, GFP_KERNEL); if (!nm_i->nat_block_bitmap) @@ -2803,7 +2832,13 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) up_write(&nm_i->nat_tree_lock); kvfree(nm_i->nat_block_bitmap); - kvfree(nm_i->free_nid_bitmap); + if (nm_i->free_nid_bitmap) { + int i; + + for (i = 0; i < nm_i->nat_blocks; i++) + kvfree(nm_i->free_nid_bitmap[i]); + kfree(nm_i->free_nid_bitmap); + } kvfree(nm_i->free_nid_count); kfree(nm_i->nat_bitmap); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 081ef0d672bf..b95e49e4a928 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -44,6 +44,7 @@ enum { HAS_FSYNCED_INODE, /* is the inode fsynced before? */ HAS_LAST_FSYNC, /* has the latest node fsync mark? */ IS_DIRTY, /* this nat entry is dirty? */ + IS_PREALLOC, /* nat entry is preallocated */ }; /* @@ -422,12 +423,12 @@ static inline void clear_inline_node(struct page *page) ClearPageChecked(page); } -static inline void set_cold_node(struct inode *inode, struct page *page) +static inline void set_cold_node(struct page *page, bool is_dir) { struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); - if (S_ISDIR(inode->i_mode)) + if (is_dir) flag &= ~(0x1 << COLD_BIT_SHIFT); else flag |= (0x1 << COLD_BIT_SHIFT); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 337f3363f48f..1b23d3febe4c 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -144,7 +144,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage, retry: de = __f2fs_find_entry(dir, &fname, &page); if (de && inode->i_ino == le32_to_cpu(de->ino)) - goto out_unmap_put; + goto out_put; if (de) { einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino)); @@ -153,19 +153,19 @@ retry: err = PTR_ERR(einode); if (err == -ENOENT) err = -EEXIST; - goto out_unmap_put; + goto out_put; } err = dquot_initialize(einode); if (err) { iput(einode); - goto out_unmap_put; + goto out_put; } err = acquire_orphan_inode(F2FS_I_SB(inode)); if (err) { iput(einode); - goto out_unmap_put; + goto out_put; } f2fs_delete_entry(de, page, dir, einode); iput(einode); @@ -180,8 +180,7 @@ retry: goto retry; goto out; -out_unmap_put: - f2fs_dentry_kunmap(dir, page); +out_put: f2fs_put_page(page, 0); out: if (file_enc_name(inode)) @@ -243,6 +242,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, struct curseg_info *curseg; struct page *page = NULL; block_t blkaddr; + unsigned int loop_cnt = 0; + unsigned int free_blocks = sbi->user_block_count - + valid_user_blocks(sbi); int err = 0; /* get node pages in the current segment */ @@ -295,6 +297,17 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (IS_INODE(page) && is_dent_dnode(page)) entry->last_dentry = blkaddr; next: + /* sanity check in order to detect looped node chain */ + if (++loop_cnt >= free_blocks || + blkaddr == next_blkaddr_of_node(page)) { + f2fs_msg(sbi->sb, KERN_NOTICE, + "%s: detect looped node chain, " + "blkaddr:%u, next:%u", + __func__, blkaddr, next_blkaddr_of_node(page)); + err = -EINVAL; + break; + } + /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b16a8e6625aa..5854cc4e1d67 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1411,12 +1411,11 @@ static int issue_discard_thread(void *data) if (kthread_should_stop()) return 0; - if (dcc->discard_wake) { + if (dcc->discard_wake) dcc->discard_wake = 0; - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) - init_discard_policy(&dpolicy, - DPOLICY_FORCE, 1); - } + + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + init_discard_policy(&dpolicy, DPOLICY_FORCE, 1); sb_start_intwrite(sbi->sb); @@ -1485,7 +1484,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { #ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_mounted_blkzoned(sbi->sb) && + if (f2fs_sb_has_blkzoned(sbi->sb) && bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif @@ -1683,7 +1682,7 @@ find_next: sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (f2fs_sb_mounted_blkzoned(sbi->sb) || + if (f2fs_sb_has_blkzoned(sbi->sb) || (force && len < cpc->trim_minlen)) goto skip; @@ -1727,7 +1726,7 @@ void init_discard_policy(struct discard_policy *dpolicy, } else if (discard_type == DPOLICY_FORCE) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = true; + dpolicy->io_aware = false; } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { @@ -1863,7 +1862,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ - if (se->type == CURSEG_WARM_NODE) { + if (IS_NODESEG(se->type)) { if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks++; } @@ -2164,11 +2163,17 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (sbi->segs_per_sec != 1) return CURSEG_I(sbi, type)->segno; - if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + if (test_opt(sbi, NOHEAP) && + (type == CURSEG_HOT_DATA || IS_NODESEG(type))) return 0; if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) return SIT_I(sbi)->last_victim[ALLOC_NEXT]; + + /* find segments from 0 to reuse freed segments */ + if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) + return 0; + return CURSEG_I(sbi, type)->segno; } @@ -2455,6 +2460,101 @@ int rw_hint_to_seg_type(enum rw_hint hint) } } +/* This returns write hints for each segment type. This hints will be + * passed down to block layer. There are mapping tables which depend on + * the mount option 'whint_mode'. + * + * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. + * + * 2) whint_mode=user-based. F2FS tries to pass down hints given by users. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_NOT_SET + * HOT_NODE " + * WARM_NODE " + * COLD_NODE " + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " " + * WRITE_LIFE_MEDIUM " " + * WRITE_LIFE_LONG " " + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + * + * 3) whint_mode=fs-based. F2FS passes down hints with its policy. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_MEDIUM; + * HOT_NODE WRITE_LIFE_NOT_SET + * WARM_NODE " + * COLD_NODE WRITE_LIFE_NONE + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG + * WRITE_LIFE_NONE " " + * WRITE_LIFE_MEDIUM " " + * WRITE_LIFE_LONG " " + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + */ + +enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp) +{ + if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) { + if (type == DATA) { + if (temp == WARM) + return WRITE_LIFE_NOT_SET; + else if (temp == HOT) + return WRITE_LIFE_SHORT; + else if (temp == COLD) + return WRITE_LIFE_EXTREME; + } else { + return WRITE_LIFE_NOT_SET; + } + } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) { + if (type == DATA) { + if (temp == WARM) + return WRITE_LIFE_LONG; + else if (temp == HOT) + return WRITE_LIFE_SHORT; + else if (temp == COLD) + return WRITE_LIFE_EXTREME; + } else if (type == NODE) { + if (temp == WARM || temp == HOT) + return WRITE_LIFE_NOT_SET; + else if (temp == COLD) + return WRITE_LIFE_NONE; + } else if (type == META) { + return WRITE_LIFE_MEDIUM; + } + } + return WRITE_LIFE_NOT_SET; +} + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) @@ -2487,7 +2587,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (is_cold_data(fio->page) || file_is_cold(inode)) return CURSEG_COLD_DATA; - if (is_inode_flag_set(inode, FI_HOT_DATA)) + if (file_is_hot(inode) || + is_inode_flag_set(inode, FI_HOT_DATA)) return CURSEG_HOT_DATA; return rw_hint_to_seg_type(inode->i_write_hint); } else { @@ -2502,7 +2603,7 @@ static int __get_segment_type(struct f2fs_io_info *fio) { int type = 0; - switch (fio->sbi->active_logs) { + switch (F2FS_OPTION(fio->sbi).active_logs) { case 2: type = __get_segment_type_2(fio); break; @@ -2642,6 +2743,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_io_info fio = { .sbi = sbi, .type = META, + .temp = HOT, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = page->index, @@ -2688,8 +2790,15 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) int rewrite_data_page(struct f2fs_io_info *fio) { int err; + struct f2fs_sb_info *sbi = fio->sbi; fio->new_blkaddr = fio->old_blkaddr; + /* i/o temperature is needed for passing down write hints */ + __get_segment_type(fio); + + f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi, + GET_SEGNO(sbi, fio->new_blkaddr))->type)); + stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index f11c4bc82c78..3325d0769723 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -53,13 +53,19 @@ ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ (sbi)->segs_per_sec)) \ -#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) -#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) +#define MAIN_BLKADDR(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr)) +#define SEG0_BLKADDR(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr)) #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) #define MAIN_SECS(sbi) ((sbi)->total_sections) -#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) +#define TOTAL_SEGS(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->segment_count : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count)) #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) @@ -596,6 +602,8 @@ static inline int utilization(struct f2fs_sb_info *sbi) #define DEF_MIN_FSYNC_BLOCKS 8 #define DEF_MIN_HOT_BLOCKS 16 +#define SMALL_VOLUME_SEGMENTS (16 * 512) /* 16GB */ + enum { F2FS_IPU_FORCE, F2FS_IPU_SSR, @@ -630,10 +638,17 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); } -static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) { - BUG_ON(blk_addr < SEG0_BLKADDR(sbi) - || blk_addr >= MAX_BLKADDR(sbi)); + struct f2fs_sb_info *sbi = fio->sbi; + + if (PAGE_TYPE_OF_BIO(fio->type) == META && + (!is_read_io(fio->op) || fio->is_meta)) + BUG_ON(blk_addr < SEG0_BLKADDR(sbi) || + blk_addr >= MAIN_BLKADDR(sbi)); + else + BUG_ON(blk_addr < MAIN_BLKADDR(sbi) || + blk_addr >= MAX_BLKADDR(sbi)); } /* diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8173ae688814..42d564c5ccd0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -60,7 +60,7 @@ char *fault_name[FAULT_MAX] = { static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate) { - struct f2fs_fault_info *ffi = &sbi->fault_info; + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (rate) { atomic_set(&ffi->inject_ops, 0); @@ -129,6 +129,10 @@ enum { Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, + Opt_whint, + Opt_alloc, + Opt_fsync, + Opt_test_dummy_encryption, Opt_err, }; @@ -182,6 +186,10 @@ static match_table_t f2fs_tokens = { {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, + {Opt_whint, "whint_mode=%s"}, + {Opt_alloc, "alloc_mode=%s"}, + {Opt_fsync, "fsync_mode=%s"}, + {Opt_test_dummy_encryption, "test_dummy_encryption"}, {Opt_err, NULL}, }; @@ -202,21 +210,24 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) block_t limit = (sbi->user_block_count << 1) / 1000; /* limit is 0.2% */ - if (test_opt(sbi, RESERVE_ROOT) && sbi->root_reserved_blocks > limit) { - sbi->root_reserved_blocks = limit; + if (test_opt(sbi, RESERVE_ROOT) && + F2FS_OPTION(sbi).root_reserved_blocks > limit) { + F2FS_OPTION(sbi).root_reserved_blocks = limit; f2fs_msg(sbi->sb, KERN_INFO, "Reduce reserved blocks for root = %u", - sbi->root_reserved_blocks); + F2FS_OPTION(sbi).root_reserved_blocks); } if (!test_opt(sbi, RESERVE_ROOT) && - (!uid_eq(sbi->s_resuid, + (!uid_eq(F2FS_OPTION(sbi).s_resuid, make_kuid(&init_user_ns, F2FS_DEF_RESUID)) || - !gid_eq(sbi->s_resgid, + !gid_eq(F2FS_OPTION(sbi).s_resgid, make_kgid(&init_user_ns, F2FS_DEF_RESGID)))) f2fs_msg(sbi->sb, KERN_INFO, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", - from_kuid_munged(&init_user_ns, sbi->s_resuid), - from_kgid_munged(&init_user_ns, sbi->s_resgid)); + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); } static void init_once(void *foo) @@ -236,7 +247,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, char *qname; int ret = -EINVAL; - if (sb_any_quota_loaded(sb) && !sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { f2fs_msg(sb, KERN_ERR, "Cannot change journaled " "quota options when quota turned on"); @@ -254,8 +265,8 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "Not enough memory for storing quotafile name"); return -EINVAL; } - if (sbi->s_qf_names[qtype]) { - if (strcmp(sbi->s_qf_names[qtype], qname) == 0) + if (F2FS_OPTION(sbi).s_qf_names[qtype]) { + if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) ret = 0; else f2fs_msg(sb, KERN_ERR, @@ -268,7 +279,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "quotafile must be on filesystem root"); goto errout; } - sbi->s_qf_names[qtype] = qname; + F2FS_OPTION(sbi).s_qf_names[qtype] = qname; set_opt(sbi, QUOTA); return 0; errout: @@ -280,13 +291,13 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - if (sb_any_quota_loaded(sb) && sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options" " when quota turned on"); return -EINVAL; } - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; + kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); + F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; return 0; } @@ -302,15 +313,19 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) "Cannot enable project quota enforcement."); return -1; } - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA] || - sbi->s_qf_names[PRJQUOTA]) { - if (test_opt(sbi, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) { + if (test_opt(sbi, USRQUOTA) && + F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) clear_opt(sbi, USRQUOTA); - if (test_opt(sbi, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) + if (test_opt(sbi, GRPQUOTA) && + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) clear_opt(sbi, GRPQUOTA); - if (test_opt(sbi, PRJQUOTA) && sbi->s_qf_names[PRJQUOTA]) + if (test_opt(sbi, PRJQUOTA) && + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) clear_opt(sbi, PRJQUOTA); if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || @@ -320,19 +335,19 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) return -1; } - if (!sbi->s_jquota_fmt) { + if (!F2FS_OPTION(sbi).s_jquota_fmt) { f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format " "not specified"); return -1; } } - if (f2fs_sb_has_quota_ino(sbi->sb) && sbi->s_jquota_fmt) { + if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) { f2fs_msg(sbi->sb, KERN_INFO, "QUOTA feature is enabled, so ignore jquota_fmt"); - sbi->s_jquota_fmt = 0; + F2FS_OPTION(sbi).s_jquota_fmt = 0; } - if (f2fs_sb_has_quota_ino(sbi->sb) && sb_rdonly(sbi->sb)) { + if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) { f2fs_msg(sbi->sb, KERN_INFO, "Filesystem with quota feature cannot be mounted RDWR " "without CONFIG_QUOTA"); @@ -403,14 +418,14 @@ static int parse_options(struct super_block *sb, char *options) q = bdev_get_queue(sb->s_bdev); if (blk_queue_discard(q)) { set_opt(sbi, DISCARD); - } else if (!f2fs_sb_mounted_blkzoned(sb)) { + } else if (!f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "mounting with \"discard\" option, but " "the device does not support discard"); } break; case Opt_nodiscard: - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "discard is required for zoned block devices"); return -EINVAL; @@ -440,7 +455,7 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; set_opt(sbi, INLINE_XATTR_SIZE); - sbi->inline_xattr_size = arg; + F2FS_OPTION(sbi).inline_xattr_size = arg; break; #else case Opt_user_xattr: @@ -480,7 +495,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) return -EINVAL; - sbi->active_logs = arg; + F2FS_OPTION(sbi).active_logs = arg; break; case Opt_disable_ext_identify: set_opt(sbi, DISABLE_EXT_IDENTIFY); @@ -524,9 +539,9 @@ static int parse_options(struct super_block *sb, char *options) if (test_opt(sbi, RESERVE_ROOT)) { f2fs_msg(sb, KERN_INFO, "Preserve previous reserve_root=%u", - sbi->root_reserved_blocks); + F2FS_OPTION(sbi).root_reserved_blocks); } else { - sbi->root_reserved_blocks = arg; + F2FS_OPTION(sbi).root_reserved_blocks = arg; set_opt(sbi, RESERVE_ROOT); } break; @@ -539,7 +554,7 @@ static int parse_options(struct super_block *sb, char *options) "Invalid uid value %d", arg); return -EINVAL; } - sbi->s_resuid = uid; + F2FS_OPTION(sbi).s_resuid = uid; break; case Opt_resgid: if (args->from && match_int(args, &arg)) @@ -550,7 +565,7 @@ static int parse_options(struct super_block *sb, char *options) "Invalid gid value %d", arg); return -EINVAL; } - sbi->s_resgid = gid; + F2FS_OPTION(sbi).s_resgid = gid; break; case Opt_mode: name = match_strdup(&args[0]); @@ -559,7 +574,7 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "adaptive mode is not allowed with " "zoned block device feature"); @@ -585,7 +600,7 @@ static int parse_options(struct super_block *sb, char *options) 1 << arg, BIO_MAX_PAGES); return -EINVAL; } - sbi->write_io_size_bits = arg; + F2FS_OPTION(sbi).write_io_size_bits = arg; break; case Opt_fault_injection: if (args->from && match_int(args, &arg)) @@ -646,13 +661,13 @@ static int parse_options(struct super_block *sb, char *options) return ret; break; case Opt_jqfmt_vfsold: - sbi->s_jquota_fmt = QFMT_VFS_OLD; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD; break; case Opt_jqfmt_vfsv0: - sbi->s_jquota_fmt = QFMT_VFS_V0; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0; break; case Opt_jqfmt_vfsv1: - sbi->s_jquota_fmt = QFMT_VFS_V1; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1; break; case Opt_noquota: clear_opt(sbi, QUOTA); @@ -679,6 +694,73 @@ static int parse_options(struct super_block *sb, char *options) "quota operations not supported"); break; #endif + case Opt_whint: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 10 && + !strncmp(name, "user-based", 10)) { + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER; + } else if (strlen(name) == 3 && + !strncmp(name, "off", 3)) { + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; + } else if (strlen(name) == 8 && + !strncmp(name, "fs-based", 8)) { + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_alloc: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + if (strlen(name) == 7 && + !strncmp(name, "default", 7)) { + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; + } else if (strlen(name) == 5 && + !strncmp(name, "reuse", 5)) { + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_fsync: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 5 && + !strncmp(name, "posix", 5)) { + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; + } else if (strlen(name) == 6 && + !strncmp(name, "strict", 6)) { + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_test_dummy_encryption: +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (!f2fs_sb_has_encrypt(sb)) { + f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); + return -EINVAL; + } + + F2FS_OPTION(sbi).test_dummy_encryption = true; + f2fs_msg(sb, KERN_INFO, + "Test dummy encryption mode enabled"); +#else + f2fs_msg(sb, KERN_INFO, + "Test dummy encryption mount option ignored"); +#endif + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -699,14 +781,22 @@ static int parse_options(struct super_block *sb, char *options) } if (test_opt(sbi, INLINE_XATTR_SIZE)) { + if (!f2fs_sb_has_extra_attr(sb) || + !f2fs_sb_has_flexible_inline_xattr(sb)) { + f2fs_msg(sb, KERN_ERR, + "extra_attr or flexible_inline_xattr " + "feature is off"); + return -EINVAL; + } if (!test_opt(sbi, INLINE_XATTR)) { f2fs_msg(sb, KERN_ERR, "inline_xattr_size option should be " "set with inline_xattr option"); return -EINVAL; } - if (!sbi->inline_xattr_size || - sbi->inline_xattr_size >= DEF_ADDRS_PER_INODE - + if (!F2FS_OPTION(sbi).inline_xattr_size || + F2FS_OPTION(sbi).inline_xattr_size >= + DEF_ADDRS_PER_INODE - F2FS_TOTAL_EXTRA_ATTR_SIZE - DEF_INLINE_RESERVED_SIZE - DEF_MIN_INLINE_SIZE) { @@ -715,6 +805,12 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; } } + + /* Not pass down write hints if the number of active logs is lesser + * than NR_CURSEG_TYPE. + */ + if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE) + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; return 0; } @@ -731,7 +827,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; - fi->i_advise = 0; init_rwsem(&fi->i_sem); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); @@ -743,10 +838,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_rwsem(&fi->i_mmap_sem); init_rwsem(&fi->i_xattr_sem); -#ifdef CONFIG_QUOTA - memset(&fi->i_dquot, 0, sizeof(fi->i_dquot)); - fi->i_reserved_quota = 0; -#endif /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; @@ -956,7 +1047,7 @@ static void f2fs_put_super(struct super_block *sb) mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif destroy_percpu_info(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) @@ -1070,8 +1161,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) - sbi->current_reserved_blocks; - if (buf->f_bfree > sbi->root_reserved_blocks) - buf->f_bavail = buf->f_bfree - sbi->root_reserved_blocks; + if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks) + buf->f_bavail = buf->f_bfree - + F2FS_OPTION(sbi).root_reserved_blocks; else buf->f_bavail = 0; @@ -1106,10 +1198,10 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, #ifdef CONFIG_QUOTA struct f2fs_sb_info *sbi = F2FS_SB(sb); - if (sbi->s_jquota_fmt) { + if (F2FS_OPTION(sbi).s_jquota_fmt) { char *fmtname = ""; - switch (sbi->s_jquota_fmt) { + switch (F2FS_OPTION(sbi).s_jquota_fmt) { case QFMT_VFS_OLD: fmtname = "vfsold"; break; @@ -1123,14 +1215,17 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, seq_printf(seq, ",jqfmt=%s", fmtname); } - if (sbi->s_qf_names[USRQUOTA]) - seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) + seq_show_option(seq, "usrjquota", + F2FS_OPTION(sbi).s_qf_names[USRQUOTA]); - if (sbi->s_qf_names[GRPQUOTA]) - seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) + seq_show_option(seq, "grpjquota", + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]); - if (sbi->s_qf_names[PRJQUOTA]) - seq_show_option(seq, "prjjquota", sbi->s_qf_names[PRJQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) + seq_show_option(seq, "prjjquota", + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]); #endif } @@ -1165,7 +1260,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",noinline_xattr"); if (test_opt(sbi, INLINE_XATTR_SIZE)) seq_printf(seq, ",inline_xattr_size=%u", - sbi->inline_xattr_size); + F2FS_OPTION(sbi).inline_xattr_size); #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL if (test_opt(sbi, POSIX_ACL)) @@ -1201,18 +1296,20 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, "adaptive"); else if (test_opt(sbi, LFS)) seq_puts(seq, "lfs"); - seq_printf(seq, ",active_logs=%u", sbi->active_logs); + seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); if (test_opt(sbi, RESERVE_ROOT)) seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u", - sbi->root_reserved_blocks, - from_kuid_munged(&init_user_ns, sbi->s_resuid), - from_kgid_munged(&init_user_ns, sbi->s_resgid)); + F2FS_OPTION(sbi).root_reserved_blocks, + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); if (F2FS_IO_SIZE_BITS(sbi)) seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); #ifdef CONFIG_F2FS_FAULT_INJECTION if (test_opt(sbi, FAULT_INJECTION)) seq_printf(seq, ",fault_injection=%u", - sbi->fault_info.inject_rate); + F2FS_OPTION(sbi).fault_info.inject_rate); #endif #ifdef CONFIG_QUOTA if (test_opt(sbi, QUOTA)) @@ -1225,15 +1322,37 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",prjquota"); #endif f2fs_show_quota_options(seq, sbi->sb); + if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) + seq_printf(seq, ",whint_mode=%s", "user-based"); + else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) + seq_printf(seq, ",whint_mode=%s", "fs-based"); +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (F2FS_OPTION(sbi).test_dummy_encryption) + seq_puts(seq, ",test_dummy_encryption"); +#endif + + if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT) + seq_printf(seq, ",alloc_mode=%s", "default"); + else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) + seq_printf(seq, ",alloc_mode=%s", "reuse"); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX) + seq_printf(seq, ",fsync_mode=%s", "posix"); + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + seq_printf(seq, ",fsync_mode=%s", "strict"); return 0; } static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ - sbi->active_logs = NR_CURSEG_TYPE; - sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE; + F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; + F2FS_OPTION(sbi).test_dummy_encryption = false; + sbi->readdir_ra = 1; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -1243,7 +1362,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, NOHEAP); sbi->sb->s_flags |= SB_LAZYTIME; set_opt(sbi, FLUSH_MERGE); - if (f2fs_sb_mounted_blkzoned(sbi->sb)) { + if (f2fs_sb_has_blkzoned(sbi->sb)) { set_opt_mode(sbi, F2FS_MOUNT_LFS); set_opt(sbi, DISCARD); } else { @@ -1270,16 +1389,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; unsigned long old_sb_flags; - int err, active_logs; + int err; bool need_restart_gc = false; bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct f2fs_fault_info ffi = sbi->fault_info; -#endif #ifdef CONFIG_QUOTA - int s_jquota_fmt; - char *s_qf_names[MAXQUOTAS]; int i, j; #endif @@ -1289,21 +1403,21 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ org_mount_opt = sbi->mount_opt; old_sb_flags = sb->s_flags; - active_logs = sbi->active_logs; #ifdef CONFIG_QUOTA - s_jquota_fmt = sbi->s_jquota_fmt; + org_mount_opt.s_jquota_fmt = F2FS_OPTION(sbi).s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - if (sbi->s_qf_names[i]) { - s_qf_names[i] = kstrdup(sbi->s_qf_names[i], - GFP_KERNEL); - if (!s_qf_names[i]) { + if (F2FS_OPTION(sbi).s_qf_names[i]) { + org_mount_opt.s_qf_names[i] = + kstrdup(F2FS_OPTION(sbi).s_qf_names[i], + GFP_KERNEL); + if (!org_mount_opt.s_qf_names[i]) { for (j = 0; j < i; j++) - kfree(s_qf_names[j]); + kfree(org_mount_opt.s_qf_names[j]); return -ENOMEM; } } else { - s_qf_names[i] = NULL; + org_mount_opt.s_qf_names[i] = NULL; } } #endif @@ -1373,7 +1487,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_gc = true; } - if (*flags & SB_RDONLY) { + if (*flags & SB_RDONLY || + F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) { writeback_inodes_sb(sb, WB_REASON_SYNC); sync_inodes_sb(sb); @@ -1399,7 +1514,7 @@ skip: #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) - kfree(s_qf_names[i]); + kfree(org_mount_opt.s_qf_names[i]); #endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | @@ -1417,18 +1532,14 @@ restore_gc: } restore_opts: #ifdef CONFIG_QUOTA - sbi->s_jquota_fmt = s_jquota_fmt; + F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - kfree(sbi->s_qf_names[i]); - sbi->s_qf_names[i] = s_qf_names[i]; + kfree(F2FS_OPTION(sbi).s_qf_names[i]); + F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i]; } #endif sbi->mount_opt = org_mount_opt; - sbi->active_logs = active_logs; sb->s_flags = old_sb_flags; -#ifdef CONFIG_F2FS_FAULT_INJECTION - sbi->fault_info = ffi; -#endif return err; } @@ -1456,7 +1567,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, while (toread > 0) { tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); repeat: - page = read_mapping_page(mapping, blkidx, NULL); + page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS); if (IS_ERR(page)) { if (PTR_ERR(page) == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, HZ/50); @@ -1550,8 +1661,8 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) { - return dquot_quota_on_mount(sbi->sb, sbi->s_qf_names[type], - sbi->s_jquota_fmt, type); + return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type], + F2FS_OPTION(sbi).s_jquota_fmt, type); } int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) @@ -1570,7 +1681,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) } for (i = 0; i < MAXQUOTAS; i++) { - if (sbi->s_qf_names[i]) { + if (F2FS_OPTION(sbi).s_qf_names[i]) { err = f2fs_quota_on_mount(sbi, i); if (!err) { enabled = 1; @@ -1797,11 +1908,28 @@ static int f2fs_get_context(struct inode *inode, void *ctx, size_t len) static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, void *fs_data) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + /* + * Encrypting the root directory is not allowed because fsck + * expects lost+found directory to exist and remain unencrypted + * if LOST_FOUND feature is enabled. + * + */ + if (f2fs_sb_has_lost_found(sbi->sb) && + inode->i_ino == F2FS_ROOT_INO(sbi)) + return -EPERM; + return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len, fs_data, XATTR_CREATE); } +static bool f2fs_dummy_context(struct inode *inode) +{ + return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode)); +} + static unsigned f2fs_max_namelen(struct inode *inode) { return S_ISLNK(inode->i_mode) ? @@ -1812,6 +1940,7 @@ static const struct fscrypt_operations f2fs_cryptops = { .key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, + .dummy_context = f2fs_dummy_context, .empty_dir = f2fs_empty_dir, .max_namelen = f2fs_max_namelen, }; @@ -1894,7 +2023,6 @@ static int __f2fs_commit_super(struct buffer_head *bh, lock_buffer(bh); if (super) memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - set_buffer_uptodate(bh); set_buffer_dirty(bh); unlock_buffer(bh); @@ -2181,6 +2309,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->dirty_device = 0; spin_lock_init(&sbi->dev_lock); + + init_rwsem(&sbi->sb_lock); } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -2206,7 +2336,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) unsigned int n = 0; int err = -EIO; - if (!f2fs_sb_mounted_blkzoned(sbi->sb)) + if (!f2fs_sb_has_blkzoned(sbi->sb)) return 0; if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != @@ -2334,7 +2464,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) } /* write back-up superblock first */ - bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1); + bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); if (!bh) return -EIO; err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); @@ -2345,7 +2475,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return err; /* write current valid superblock */ - bh = sb_getblk(sbi->sb, sbi->valid_super_block); + bh = sb_bread(sbi->sb, sbi->valid_super_block); if (!bh) return -EIO; err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); @@ -2413,7 +2543,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && - !f2fs_sb_mounted_blkzoned(sbi->sb)) { + !f2fs_sb_has_blkzoned(sbi->sb)) { f2fs_msg(sbi->sb, KERN_ERR, "Zoned block device feature not enabled\n"); return -EINVAL; @@ -2447,6 +2577,18 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) return 0; } +static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_i = SM_I(sbi); + + /* adjust parameters according to the volume size */ + if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) { + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; + sm_i->dcc_info->discard_granularity = 1; + sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; + } +} + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; @@ -2494,8 +2636,8 @@ try_onemore: sb->s_fs_info = sbi; sbi->raw_super = raw_super; - sbi->s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); - sbi->s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); + F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); + F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); /* precompute checksum seed for metadata */ if (f2fs_sb_has_inode_chksum(sb)) @@ -2508,7 +2650,7 @@ try_onemore: * devices, but mandatory for host-managed zoned block devices. */ #ifndef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_ERR, "Zoned block device support is not enabled\n"); err = -EOPNOTSUPP; @@ -2724,7 +2866,7 @@ try_onemore: * Turn on quotas which were not enabled for read-only mounts if * filesystem has quota feature, so that they are updated correctly. */ - if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) { + if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); if (err) { f2fs_msg(sb, KERN_ERR, @@ -2799,6 +2941,8 @@ skip_recovery: f2fs_join_shrinker(sbi); + f2fs_tuning_parameters(sbi); + f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx", cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); @@ -2807,7 +2951,7 @@ skip_recovery: free_meta: #ifdef CONFIG_QUOTA - if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) + if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) f2fs_quota_off_umount(sbi->sb); #endif f2fs_sync_inode_meta(sbi); @@ -2851,7 +2995,7 @@ free_bio_info: free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif kfree(options); free_sb_buf: diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d978c7b6ea04..f33a56d6e6dd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -58,7 +58,7 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) #ifdef CONFIG_F2FS_FAULT_INJECTION else if (struct_type == FAULT_INFO_RATE || struct_type == FAULT_INFO_TYPE) - return (unsigned char *)&sbi->fault_info; + return (unsigned char *)&F2FS_OPTION(sbi).fault_info; #endif return NULL; } @@ -92,10 +92,10 @@ static ssize_t features_show(struct f2fs_attr *a, if (!sb->s_bdev->bd_part) return snprintf(buf, PAGE_SIZE, "0\n"); - if (f2fs_sb_has_crypto(sb)) + if (f2fs_sb_has_encrypt(sb)) len += snprintf(buf, PAGE_SIZE - len, "%s", "encryption"); - if (f2fs_sb_mounted_blkzoned(sb)) + if (f2fs_sb_has_blkzoned(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "blkzoned"); if (f2fs_sb_has_extra_attr(sb)) @@ -116,6 +116,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_inode_crtime(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_crtime"); + if (f2fs_sb_has_lost_found(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "lost_found"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -136,6 +139,27 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!ptr) return -EINVAL; + if (!strcmp(a->attr.name, "extension_list")) { + __u8 (*extlist)[F2FS_EXTENSION_LEN] = + sbi->raw_super->extension_list; + int cold_count = le32_to_cpu(sbi->raw_super->extension_count); + int hot_count = sbi->raw_super->hot_ext_count; + int len = 0, i; + + len += snprintf(buf + len, PAGE_SIZE - len, + "cold file extenstion:\n"); + for (i = 0; i < cold_count; i++) + len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + extlist[i]); + + len += snprintf(buf + len, PAGE_SIZE - len, + "hot file extenstion:\n"); + for (i = cold_count; i < cold_count + hot_count; i++) + len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + extlist[i]); + return len; + } + ui = (unsigned int *)(ptr + a->offset); return snprintf(buf, PAGE_SIZE, "%u\n", *ui); @@ -154,6 +178,41 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (!ptr) return -EINVAL; + if (!strcmp(a->attr.name, "extension_list")) { + const char *name = strim((char *)buf); + bool set = true, hot; + + if (!strncmp(name, "[h]", 3)) + hot = true; + else if (!strncmp(name, "[c]", 3)) + hot = false; + else + return -EINVAL; + + name += 3; + + if (*name == '!') { + name++; + set = false; + } + + if (strlen(name) >= F2FS_EXTENSION_LEN) + return -EINVAL; + + down_write(&sbi->sb_lock); + + ret = update_extension_list(sbi, name, hot, set); + if (ret) + goto out; + + ret = f2fs_commit_super(sbi, false); + if (ret) + update_extension_list(sbi, name, hot, !set); +out: + up_write(&sbi->sb_lock); + return ret ? ret : count; + } + ui = (unsigned int *)(ptr + a->offset); ret = kstrtoul(skip_spaces(buf), 0, &t); @@ -166,7 +225,7 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); if (t > (unsigned long)(sbi->user_block_count - - sbi->root_reserved_blocks)) { + F2FS_OPTION(sbi).root_reserved_blocks)) { spin_unlock(&sbi->stat_lock); return -EINVAL; } @@ -236,6 +295,7 @@ enum feat_id { FEAT_FLEXIBLE_INLINE_XATTR, FEAT_QUOTA_INO, FEAT_INODE_CRTIME, + FEAT_LOST_FOUND, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -251,6 +311,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_FLEXIBLE_INLINE_XATTR: case FEAT_QUOTA_INO: case FEAT_INODE_CRTIME: + case FEAT_LOST_FOUND: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -307,6 +368,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); +F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); @@ -329,6 +391,7 @@ F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); +F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -357,6 +420,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(iostat_enable), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), + ATTR_LIST(extension_list), #ifdef CONFIG_F2FS_FAULT_INJECTION ATTR_LIST(inject_rate), ATTR_LIST(inject_type), @@ -383,6 +447,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(flexible_inline_xattr), ATTR_LIST(quota_ino), ATTR_LIST(inode_crtime), + ATTR_LIST(lost_found), NULL, }; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d4d04fee568a..1280f915079b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1343,7 +1343,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) dirty = inode->i_state & I_DIRTY; if (inode->i_state & I_DIRTY_TIME) { - if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || + if ((dirty & I_DIRTY_INODE) || wbc->sync_mode == WB_SYNC_ALL || unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || unlikely(time_after(jiffies, @@ -2112,7 +2112,6 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) */ void __mark_inode_dirty(struct inode *inode, int flags) { -#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) struct super_block *sb = inode->i_sb; int dirtytime; @@ -2122,7 +2121,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) * Don't do this for I_DIRTY_PAGES - that doesn't actually * dirty the inode itself */ - if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) { + if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) { trace_writeback_dirty_inode_start(inode, flags); if (sb->s_op->dirty_inode) @@ -2197,7 +2196,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) if (dirtytime) inode->dirtied_time_when = jiffies; - if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES)) + if (inode->i_state & I_DIRTY) dirty_list = &wb->b_dirty; else dirty_list = &wb->b_dirty_time; @@ -2221,8 +2220,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) } out_unlock_inode: spin_unlock(&inode->i_lock); - -#undef I_DIRTY_INODE } EXPORT_SYMBOL(__mark_inode_dirty); diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index 56cce7fdd39e..c184c5a356ff 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -125,7 +125,7 @@ struct fscache_cache *fscache_select_cache_for_object( } /* the parent is unbacked */ - if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { + if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX) { /* cookie not an index and is unbacked */ spin_unlock(&cookie->lock); _leave(" = NULL [cookie ub,ni]"); diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index d705125665f0..7dc55b93a830 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -21,12 +21,54 @@ struct kmem_cache *fscache_cookie_jar; static atomic_t fscache_object_debug_id = ATOMIC_INIT(0); -static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie); +#define fscache_cookie_hash_shift 15 +static struct hlist_bl_head fscache_cookie_hash[1 << fscache_cookie_hash_shift]; + +static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie, + loff_t object_size); static int fscache_alloc_object(struct fscache_cache *cache, struct fscache_cookie *cookie); static int fscache_attach_object(struct fscache_cookie *cookie, struct fscache_object *object); +static void fscache_print_cookie(struct fscache_cookie *cookie, char prefix) +{ + struct hlist_node *object; + const u8 *k; + unsigned loop; + + pr_err("%c-cookie c=%p [p=%p fl=%lx nc=%u na=%u]\n", + prefix, cookie, cookie->parent, cookie->flags, + atomic_read(&cookie->n_children), + atomic_read(&cookie->n_active)); + pr_err("%c-cookie d=%p n=%p\n", + prefix, cookie->def, cookie->netfs_data); + + object = READ_ONCE(cookie->backing_objects.first); + if (object) + pr_err("%c-cookie o=%p\n", + prefix, hlist_entry(object, struct fscache_object, cookie_link)); + + pr_err("%c-key=[%u] '", prefix, cookie->key_len); + k = (cookie->key_len <= sizeof(cookie->inline_key)) ? + cookie->inline_key : cookie->key; + for (loop = 0; loop < cookie->key_len; loop++) + pr_cont("%02x", k[loop]); + pr_cont("'\n"); +} + +void fscache_free_cookie(struct fscache_cookie *cookie) +{ + if (cookie) { + BUG_ON(!hlist_empty(&cookie->backing_objects)); + if (cookie->aux_len > sizeof(cookie->inline_aux)) + kfree(cookie->aux); + if (cookie->key_len > sizeof(cookie->inline_key)) + kfree(cookie->key); + kmem_cache_free(fscache_cookie_jar, cookie); + } +} + /* * initialise an cookie jar slab element prior to any use */ @@ -41,6 +83,170 @@ void fscache_cookie_init_once(void *_cookie) } /* + * Set the index key in a cookie. The cookie struct has space for a 12-byte + * key plus length and hash, but if that's not big enough, it's instead a + * pointer to a buffer containing 3 bytes of hash, 1 byte of length and then + * the key data. + */ +static int fscache_set_key(struct fscache_cookie *cookie, + const void *index_key, size_t index_key_len) +{ + unsigned long long h; + u32 *buf; + int i; + + cookie->key_len = index_key_len; + + if (index_key_len > sizeof(cookie->inline_key)) { + buf = kzalloc(index_key_len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + cookie->key = buf; + } else { + buf = (u32 *)cookie->inline_key; + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + } + + memcpy(buf, index_key, index_key_len); + + /* Calculate a hash and combine this with the length in the first word + * or first half word + */ + h = (unsigned long)cookie->parent; + h += index_key_len + cookie->type; + for (i = 0; i < (index_key_len + sizeof(u32) - 1) / sizeof(u32); i++) + h += buf[i]; + + cookie->key_hash = h ^ (h >> 32); + return 0; +} + +static long fscache_compare_cookie(const struct fscache_cookie *a, + const struct fscache_cookie *b) +{ + const void *ka, *kb; + + if (a->key_hash != b->key_hash) + return (long)a->key_hash - (long)b->key_hash; + if (a->parent != b->parent) + return (long)a->parent - (long)b->parent; + if (a->key_len != b->key_len) + return (long)a->key_len - (long)b->key_len; + if (a->type != b->type) + return (long)a->type - (long)b->type; + + if (a->key_len <= sizeof(a->inline_key)) { + ka = &a->inline_key; + kb = &b->inline_key; + } else { + ka = a->key; + kb = b->key; + } + return memcmp(ka, kb, a->key_len); +} + +/* + * Allocate a cookie. + */ +struct fscache_cookie *fscache_alloc_cookie( + struct fscache_cookie *parent, + const struct fscache_cookie_def *def, + const void *index_key, size_t index_key_len, + const void *aux_data, size_t aux_data_len, + void *netfs_data, + loff_t object_size) +{ + struct fscache_cookie *cookie; + + /* allocate and initialise a cookie */ + cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL); + if (!cookie) + return NULL; + + cookie->key_len = index_key_len; + cookie->aux_len = aux_data_len; + + if (fscache_set_key(cookie, index_key, index_key_len) < 0) + goto nomem; + + if (cookie->aux_len <= sizeof(cookie->inline_aux)) { + memcpy(cookie->inline_aux, aux_data, cookie->aux_len); + } else { + cookie->aux = kmemdup(aux_data, cookie->aux_len, GFP_KERNEL); + if (!cookie->aux) + goto nomem; + } + + atomic_set(&cookie->usage, 1); + atomic_set(&cookie->n_children, 0); + + /* We keep the active count elevated until relinquishment to prevent an + * attempt to wake up every time the object operations queue quiesces. + */ + atomic_set(&cookie->n_active, 1); + + cookie->def = def; + cookie->parent = parent; + cookie->netfs_data = netfs_data; + cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET); + cookie->type = def->type; + + /* radix tree insertion won't use the preallocation pool unless it's + * told it may not wait */ + INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); + return cookie; + +nomem: + fscache_free_cookie(cookie); + return NULL; +} + +/* + * Attempt to insert the new cookie into the hash. If there's a collision, we + * return the old cookie if it's not in use and an error otherwise. + */ +struct fscache_cookie *fscache_hash_cookie(struct fscache_cookie *candidate) +{ + struct fscache_cookie *cursor; + struct hlist_bl_head *h; + struct hlist_bl_node *p; + unsigned int bucket; + + bucket = candidate->key_hash & (ARRAY_SIZE(fscache_cookie_hash) - 1); + h = &fscache_cookie_hash[bucket]; + + hlist_bl_lock(h); + hlist_bl_for_each_entry(cursor, p, h, hash_link) { + if (fscache_compare_cookie(candidate, cursor) == 0) + goto collision; + } + + __set_bit(FSCACHE_COOKIE_ACQUIRED, &candidate->flags); + fscache_cookie_get(candidate->parent, fscache_cookie_get_acquire_parent); + atomic_inc(&candidate->parent->n_children); + hlist_bl_add_head(&candidate->hash_link, h); + hlist_bl_unlock(h); + return candidate; + +collision: + if (test_and_set_bit(FSCACHE_COOKIE_ACQUIRED, &cursor->flags)) { + trace_fscache_cookie(cursor, fscache_cookie_collision, + atomic_read(&cursor->usage)); + pr_err("Duplicate cookie detected\n"); + fscache_print_cookie(cursor, 'O'); + fscache_print_cookie(candidate, 'N'); + hlist_bl_unlock(h); + return NULL; + } + + fscache_cookie_get(cursor, fscache_cookie_get_reacquire); + hlist_bl_unlock(h); + return cursor; +} + +/* * request a cookie to represent an object (index, datafile, xattr, etc) * - parent specifies the parent object * - the top level index cookie for each netfs is stored in the fscache_netfs @@ -58,10 +264,13 @@ void fscache_cookie_init_once(void *_cookie) struct fscache_cookie *__fscache_acquire_cookie( struct fscache_cookie *parent, const struct fscache_cookie_def *def, + const void *index_key, size_t index_key_len, + const void *aux_data, size_t aux_data_len, void *netfs_data, + loff_t object_size, bool enable) { - struct fscache_cookie *cookie; + struct fscache_cookie *candidate, *cookie; BUG_ON(!def); @@ -69,6 +278,13 @@ struct fscache_cookie *__fscache_acquire_cookie( parent ? (char *) parent->def->name : "<no-parent>", def->name, netfs_data, enable); + if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255) + return NULL; + if (!aux_data || !aux_data_len) { + aux_data = NULL; + aux_data_len = 0; + } + fscache_stat(&fscache_n_acquires); /* if there's no parent cookie, then we don't create one here either */ @@ -79,41 +295,31 @@ struct fscache_cookie *__fscache_acquire_cookie( } /* validate the definition */ - BUG_ON(!def->get_key); BUG_ON(!def->name[0]); BUG_ON(def->type == FSCACHE_COOKIE_TYPE_INDEX && - parent->def->type != FSCACHE_COOKIE_TYPE_INDEX); + parent->type != FSCACHE_COOKIE_TYPE_INDEX); - /* allocate and initialise a cookie */ - cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL); - if (!cookie) { + candidate = fscache_alloc_cookie(parent, def, + index_key, index_key_len, + aux_data, aux_data_len, + netfs_data, object_size); + if (!candidate) { fscache_stat(&fscache_n_acquires_oom); _leave(" [ENOMEM]"); return NULL; } - atomic_set(&cookie->usage, 1); - atomic_set(&cookie->n_children, 0); - - /* We keep the active count elevated until relinquishment to prevent an - * attempt to wake up every time the object operations queue quiesces. - */ - atomic_set(&cookie->n_active, 1); - - atomic_inc(&parent->usage); - atomic_inc(&parent->n_children); + cookie = fscache_hash_cookie(candidate); + if (!cookie) { + trace_fscache_cookie(candidate, fscache_cookie_discard, 1); + goto out; + } - cookie->def = def; - cookie->parent = parent; - cookie->netfs_data = netfs_data; - cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET); + if (cookie == candidate) + candidate = NULL; - /* radix tree insertion won't use the preallocation pool unless it's - * told it may not wait */ - INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); - - switch (cookie->def->type) { + switch (cookie->type) { case FSCACHE_COOKIE_TYPE_INDEX: fscache_stat(&fscache_n_cookie_index); break; @@ -125,16 +331,19 @@ struct fscache_cookie *__fscache_acquire_cookie( break; } + trace_fscache_acquire(cookie); + if (enable) { /* if the object is an index then we need do nothing more here * - we create indices on disk when we need them as an index * may exist in multiple caches */ - if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { - if (fscache_acquire_non_index_cookie(cookie) == 0) { + if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX) { + if (fscache_acquire_non_index_cookie(cookie, object_size) == 0) { set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); } else { atomic_dec(&parent->n_children); - __fscache_cookie_put(cookie); + fscache_cookie_put(cookie, + fscache_cookie_put_acquire_nobufs); fscache_stat(&fscache_n_acquires_nobufs); _leave(" = NULL"); return NULL; @@ -145,7 +354,9 @@ struct fscache_cookie *__fscache_acquire_cookie( } fscache_stat(&fscache_n_acquires_ok); - _leave(" = %p", cookie); + +out: + fscache_free_cookie(candidate); return cookie; } EXPORT_SYMBOL(__fscache_acquire_cookie); @@ -154,24 +365,30 @@ EXPORT_SYMBOL(__fscache_acquire_cookie); * Enable a cookie to permit it to accept new operations. */ void __fscache_enable_cookie(struct fscache_cookie *cookie, + const void *aux_data, + loff_t object_size, bool (*can_enable)(void *data), void *data) { _enter("%p", cookie); + trace_fscache_enable(cookie); + wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, TASK_UNINTERRUPTIBLE); + fscache_update_aux(cookie, aux_data); + if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) goto out_unlock; if (can_enable && !can_enable(data)) { /* The netfs decided it didn't want to enable after all */ - } else if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { + } else if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX) { /* Wait for outstanding disablement to complete */ __fscache_wait_on_invalidate(cookie); - if (fscache_acquire_non_index_cookie(cookie) == 0) + if (fscache_acquire_non_index_cookie(cookie, object_size) == 0) set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); } else { set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); @@ -188,11 +405,11 @@ EXPORT_SYMBOL(__fscache_enable_cookie); * - this must make sure the index chain is instantiated and instantiate the * object representation too */ -static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) +static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie, + loff_t object_size) { struct fscache_object *object; struct fscache_cache *cache; - uint64_t i_size; int ret; _enter(""); @@ -231,9 +448,6 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) return ret; } - /* pass on how big the object we're caching is supposed to be */ - cookie->def->get_attr(cookie->netfs_data, &i_size); - spin_lock(&cookie->lock); if (hlist_empty(&cookie->backing_objects)) { spin_unlock(&cookie->lock); @@ -243,7 +457,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); - fscache_set_store_limit(object, i_size); + fscache_set_store_limit(object, object_size); /* initiate the process of looking up all the objects in the chain * (done by fscache_initialise_object()) */ @@ -318,7 +532,7 @@ static int fscache_alloc_object(struct fscache_cache *cache, * attached to the cookie */ if (fscache_attach_object(cookie, object) < 0) { fscache_stat(&fscache_n_cop_put_object); - cache->ops->put_object(object); + cache->ops->put_object(object, fscache_obj_put_attach_fail); fscache_stat_d(&fscache_n_cop_put_object); } @@ -338,7 +552,7 @@ object_already_extant: error_put: fscache_stat(&fscache_n_cop_put_object); - cache->ops->put_object(object); + cache->ops->put_object(object, fscache_obj_put_alloc_fail); fscache_stat_d(&fscache_n_cop_put_object); error: _leave(" = %d", ret); @@ -398,7 +612,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, /* attach to the cookie */ object->cookie = cookie; - atomic_inc(&cookie->usage); + fscache_cookie_get(cookie, fscache_cookie_get_attach_object); hlist_add_head(&object->cookie_link, &cookie->backing_objects); fscache_objlist_add(object); @@ -426,10 +640,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie) * there, and if it's doing that, it may as well just retire the * cookie. */ - ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE); - - /* We will be updating the cookie too. */ - BUG_ON(!cookie->def->get_aux); + ASSERTCMP(cookie->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE); /* If there's an object, we tell the object state machine to handle the * invalidation on our behalf, otherwise there's nothing to do. @@ -473,7 +684,7 @@ EXPORT_SYMBOL(__fscache_wait_on_invalidate); /* * update the index entries backing a cookie */ -void __fscache_update_cookie(struct fscache_cookie *cookie) +void __fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data) { struct fscache_object *object; @@ -487,10 +698,10 @@ void __fscache_update_cookie(struct fscache_cookie *cookie) _enter("{%s}", cookie->def->name); - BUG_ON(!cookie->def->get_aux); - spin_lock(&cookie->lock); + fscache_update_aux(cookie, aux_data); + if (fscache_cookie_enabled(cookie)) { /* update the index entry on disk in each cache backing this * cookie. @@ -509,13 +720,17 @@ EXPORT_SYMBOL(__fscache_update_cookie); /* * Disable a cookie to stop it from accepting new requests from the netfs. */ -void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) +void __fscache_disable_cookie(struct fscache_cookie *cookie, + const void *aux_data, + bool invalidate) { struct fscache_object *object; bool awaken = false; _enter("%p,%u", cookie, invalidate); + trace_fscache_disable(cookie); + ASSERTCMP(atomic_read(&cookie->n_active), >, 0); if (atomic_read(&cookie->n_children) != 0) { @@ -526,6 +741,9 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, TASK_UNINTERRUPTIBLE); + + fscache_update_aux(cookie, aux_data); + if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) goto out_unlock_enable; @@ -563,7 +781,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) } /* Make sure any pending writes are cancelled. */ - if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) + if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX) fscache_invalidate_writes(cookie); /* Reset the cookie state if it wasn't relinquished */ @@ -585,7 +803,9 @@ EXPORT_SYMBOL(__fscache_disable_cookie); * - all dependents of this cookie must have already been unregistered * (indices/files/pages) */ -void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) +void __fscache_relinquish_cookie(struct fscache_cookie *cookie, + const void *aux_data, + bool retire) { fscache_stat(&fscache_n_relinquishes); if (retire) @@ -601,10 +821,13 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) cookie, cookie->def->name, cookie->netfs_data, atomic_read(&cookie->n_active), retire); + trace_fscache_relinquish(cookie, retire); + /* No further netfs-accessing operations on this cookie permitted */ - set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags); + if (test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) + BUG(); - __fscache_disable_cookie(cookie, retire); + __fscache_disable_cookie(cookie, aux_data, retire); /* Clear pointers back to the netfs */ cookie->netfs_data = NULL; @@ -619,35 +842,54 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) /* Dispose of the netfs's link to the cookie */ ASSERTCMP(atomic_read(&cookie->usage), >, 0); - fscache_cookie_put(cookie); + fscache_cookie_put(cookie, fscache_cookie_put_relinquish); _leave(""); } EXPORT_SYMBOL(__fscache_relinquish_cookie); /* - * destroy a cookie + * Remove a cookie from the hash table. */ -void __fscache_cookie_put(struct fscache_cookie *cookie) +static void fscache_unhash_cookie(struct fscache_cookie *cookie) +{ + struct hlist_bl_head *h; + unsigned int bucket; + + bucket = cookie->key_hash & (ARRAY_SIZE(fscache_cookie_hash) - 1); + h = &fscache_cookie_hash[bucket]; + + hlist_bl_lock(h); + hlist_bl_del(&cookie->hash_link); + hlist_bl_unlock(h); +} + +/* + * Drop a reference to a cookie. + */ +void fscache_cookie_put(struct fscache_cookie *cookie, + enum fscache_cookie_trace where) { struct fscache_cookie *parent; + int usage; _enter("%p", cookie); - for (;;) { - _debug("FREE COOKIE %p", cookie); - parent = cookie->parent; - BUG_ON(!hlist_empty(&cookie->backing_objects)); - kmem_cache_free(fscache_cookie_jar, cookie); + do { + usage = atomic_dec_return(&cookie->usage); + trace_fscache_cookie(cookie, where, usage); - if (!parent) - break; + if (usage > 0) + return; + BUG_ON(usage < 0); + + parent = cookie->parent; + fscache_unhash_cookie(cookie); + fscache_free_cookie(cookie); cookie = parent; - BUG_ON(atomic_read(&cookie->usage) <= 0); - if (!atomic_dec_and_test(&cookie->usage)) - break; - } + where = fscache_cookie_put_parent; + } while (cookie); _leave(""); } @@ -657,7 +899,8 @@ void __fscache_cookie_put(struct fscache_cookie *cookie) * * NOTE: it only serves no-index type */ -int __fscache_check_consistency(struct fscache_cookie *cookie) +int __fscache_check_consistency(struct fscache_cookie *cookie, + const void *aux_data) { struct fscache_operation *op; struct fscache_object *object; @@ -666,7 +909,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) _enter("%p,", cookie); - ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE); + ASSERTCMP(cookie->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE); if (fscache_wait_for_deferred_lookup(cookie) < 0) return -ERESTARTSYS; @@ -678,13 +921,16 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) if (!op) return -ENOMEM; - fscache_operation_init(op, NULL, NULL, NULL); + fscache_operation_init(cookie, op, NULL, NULL, NULL); op->flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING) | (1 << FSCACHE_OP_UNUSE_COOKIE); + trace_fscache_page_op(cookie, NULL, op, fscache_page_op_check_consistency); spin_lock(&cookie->lock); + fscache_update_aux(cookie, aux_data); + if (!fscache_cookie_enabled(cookie) || hlist_empty(&cookie->backing_objects)) goto inconsistent; diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c index 5a117df2a9ef..aa46e48d8c75 100644 --- a/fs/fscache/fsdef.c +++ b/fs/fscache/fsdef.c @@ -13,16 +13,11 @@ #include <linux/module.h> #include "internal.h" -static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax); - -static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax); - static enum fscache_checkaux fscache_fsdef_netfs_check_aux(void *cookie_netfs_data, const void *data, - uint16_t datalen); + uint16_t datalen, + loff_t object_size); /* * The root index is owned by FS-Cache itself. @@ -60,6 +55,7 @@ struct fscache_cookie fscache_fsdef_index = { .backing_objects = HLIST_HEAD_INIT, .def = &fscache_fsdef_index_def, .flags = 1 << FSCACHE_COOKIE_ENABLED, + .type = FSCACHE_COOKIE_TYPE_INDEX, }; EXPORT_SYMBOL(fscache_fsdef_index); @@ -71,59 +67,18 @@ EXPORT_SYMBOL(fscache_fsdef_index); struct fscache_cookie_def fscache_fsdef_netfs_def = { .name = "FSDEF.netfs", .type = FSCACHE_COOKIE_TYPE_INDEX, - .get_key = fscache_fsdef_netfs_get_key, - .get_aux = fscache_fsdef_netfs_get_aux, .check_aux = fscache_fsdef_netfs_check_aux, }; /* - * get the key data for an FSDEF index record - this is the name of the netfs - * for which this entry is created - */ -static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct fscache_netfs *netfs = cookie_netfs_data; - unsigned klen; - - _enter("{%s.%u},", netfs->name, netfs->version); - - klen = strlen(netfs->name); - if (klen > bufmax) - return 0; - - memcpy(buffer, netfs->name, klen); - return klen; -} - -/* - * get the auxiliary data for an FSDEF index record - this is the index - * structure version number of the netfs for which this version is created - */ -static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct fscache_netfs *netfs = cookie_netfs_data; - unsigned dlen; - - _enter("{%s.%u},", netfs->name, netfs->version); - - dlen = sizeof(uint32_t); - if (dlen > bufmax) - return 0; - - memcpy(buffer, &netfs->version, dlen); - return dlen; -} - -/* * check that the index structure version number stored in the auxiliary data * matches the one the netfs gave us */ static enum fscache_checkaux fscache_fsdef_netfs_check_aux( void *cookie_netfs_data, const void *data, - uint16_t datalen) + uint16_t datalen, + loff_t object_size) { struct fscache_netfs *netfs = cookie_netfs_data; uint32_t version; diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 0ff4b49a0037..500650f938fe 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -29,6 +29,7 @@ #define pr_fmt(fmt) "FS-Cache: " fmt #include <linux/fscache-cache.h> +#include <trace/events/fscache.h> #include <linux/sched.h> #define FSCACHE_MIN_THREADS 4 @@ -48,8 +49,16 @@ extern struct fscache_cache *fscache_select_cache_for_object( */ extern struct kmem_cache *fscache_cookie_jar; +extern void fscache_free_cookie(struct fscache_cookie *); extern void fscache_cookie_init_once(void *); -extern void __fscache_cookie_put(struct fscache_cookie *); +extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *, + const struct fscache_cookie_def *, + const void *, size_t, + const void *, size_t, + void *, loff_t); +extern struct fscache_cookie *fscache_hash_cookie(struct fscache_cookie *); +extern void fscache_cookie_put(struct fscache_cookie *, + enum fscache_cookie_trace); /* * fsdef.c @@ -311,14 +320,12 @@ static inline void fscache_raise_event(struct fscache_object *object, fscache_enqueue_object(object); } -/* - * drop a reference to a cookie - */ -static inline void fscache_cookie_put(struct fscache_cookie *cookie) +static inline void fscache_cookie_get(struct fscache_cookie *cookie, + enum fscache_cookie_trace where) { - BUG_ON(atomic_read(&cookie->usage) <= 0); - if (atomic_dec_and_test(&cookie->usage)) - __fscache_cookie_put(cookie); + int usage = atomic_inc_return(&cookie->usage); + + trace_fscache_cookie(cookie, where, usage); } /* @@ -342,6 +349,27 @@ void fscache_put_context(struct fscache_cookie *cookie, void *context) cookie->def->put_context(cookie->netfs_data, context); } +/* + * Update the auxiliary data on a cookie. + */ +static inline +void fscache_update_aux(struct fscache_cookie *cookie, const void *aux_data) +{ + void *p; + + if (!aux_data) + return; + if (cookie->aux_len <= sizeof(cookie->inline_aux)) + p = cookie->inline_aux; + else + p = cookie->aux; + + if (memcmp(p, aux_data, cookie->aux_len) != 0) { + memcpy(p, aux_data, cookie->aux_len); + set_bit(FSCACHE_COOKIE_AUX_UPDATED, &cookie->flags); + } +} + /*****************************************************************************/ /* * debug tracing diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 249968dcbf5c..7dce110bf17d 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -16,6 +16,7 @@ #include <linux/completion.h> #include <linux/slab.h> #include <linux/seq_file.h> +#define CREATE_TRACE_POINTS #include "internal.h" MODULE_DESCRIPTION("FS Cache Manager"); diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index a8aa00be4444..c2f605483cc5 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -14,69 +14,51 @@ #include <linux/slab.h> #include "internal.h" -static LIST_HEAD(fscache_netfs_list); - /* * register a network filesystem for caching */ int __fscache_register_netfs(struct fscache_netfs *netfs) { - struct fscache_netfs *ptr; - struct fscache_cookie *cookie; - int ret; + struct fscache_cookie *candidate, *cookie; _enter("{%s}", netfs->name); - INIT_LIST_HEAD(&netfs->link); - /* allocate a cookie for the primary index */ - cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); - - if (!cookie) { + candidate = fscache_alloc_cookie(&fscache_fsdef_index, + &fscache_fsdef_netfs_def, + netfs->name, strlen(netfs->name), + &netfs->version, sizeof(netfs->version), + netfs, 0); + if (!candidate) { _leave(" = -ENOMEM"); return -ENOMEM; } - /* initialise the primary index cookie */ - atomic_set(&cookie->usage, 1); - atomic_set(&cookie->n_children, 0); - atomic_set(&cookie->n_active, 1); - - cookie->def = &fscache_fsdef_netfs_def; - cookie->parent = &fscache_fsdef_index; - cookie->netfs_data = netfs; - cookie->flags = 1 << FSCACHE_COOKIE_ENABLED; - - spin_lock_init(&cookie->lock); - spin_lock_init(&cookie->stores_lock); - INIT_HLIST_HEAD(&cookie->backing_objects); + candidate->flags = 1 << FSCACHE_COOKIE_ENABLED; /* check the netfs type is not already present */ - down_write(&fscache_addremove_sem); - - ret = -EEXIST; - list_for_each_entry(ptr, &fscache_netfs_list, link) { - if (strcmp(ptr->name, netfs->name) == 0) - goto already_registered; + cookie = fscache_hash_cookie(candidate); + if (!cookie) + goto already_registered; + if (cookie != candidate) { + trace_fscache_cookie(candidate, fscache_cookie_discard, 1); + fscache_free_cookie(candidate); } - atomic_inc(&cookie->parent->usage); + fscache_cookie_get(cookie->parent, fscache_cookie_get_register_netfs); atomic_inc(&cookie->parent->n_children); netfs->primary_index = cookie; - list_add(&netfs->link, &fscache_netfs_list); - ret = 0; pr_notice("Netfs '%s' registered for caching\n", netfs->name); + trace_fscache_netfs(netfs); + _leave(" = 0"); + return 0; already_registered: - up_write(&fscache_addremove_sem); - - if (ret < 0) - kmem_cache_free(fscache_cookie_jar, cookie); - - _leave(" = %d", ret); - return ret; + fscache_cookie_put(candidate, fscache_cookie_put_dup_netfs); + _leave(" = -EEXIST"); + return -EEXIST; } EXPORT_SYMBOL(__fscache_register_netfs); @@ -88,15 +70,8 @@ void __fscache_unregister_netfs(struct fscache_netfs *netfs) { _enter("{%s.%u}", netfs->name, netfs->version); - down_write(&fscache_addremove_sem); - - list_del(&netfs->link); - fscache_relinquish_cookie(netfs->primary_index, 0); - - up_write(&fscache_addremove_sem); - - pr_notice("Netfs '%s' unregistered from caching\n", - netfs->name); + fscache_relinquish_cookie(netfs->primary_index, NULL, false); + pr_notice("Netfs '%s' unregistered from caching\n", netfs->name); _leave(""); } diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index 0438d4cd91ef..43e6e28c164f 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -36,8 +36,6 @@ struct fscache_objlist_data { #define FSCACHE_OBJLIST_CONFIG_NOEVENTS 0x00000800 /* show objects without no events */ #define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with work */ #define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without work */ - - u8 buf[512]; /* key and aux data buffer */ }; /* @@ -170,7 +168,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) struct fscache_cookie *cookie; unsigned long config = data->config; char _type[3], *type; - u8 *buf = data->buf, *p; + u8 *p; if ((unsigned long) v == 1) { seq_puts(m, "OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS" @@ -254,7 +252,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) if (fscache_use_cookie(obj)) { uint16_t keylen = 0, auxlen = 0; - switch (cookie->def->type) { + switch (cookie->type) { case 0: type = "IX"; break; @@ -263,7 +261,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) break; default: snprintf(_type, sizeof(_type), "%02u", - cookie->def->type); + cookie->type); type = _type; break; } @@ -274,30 +272,30 @@ static int fscache_objlist_show(struct seq_file *m, void *v) cookie->flags, cookie->netfs_data); - if (cookie->def->get_key && - config & FSCACHE_OBJLIST_CONFIG_KEY) - keylen = cookie->def->get_key(cookie->netfs_data, - buf, 400); + if (config & FSCACHE_OBJLIST_CONFIG_KEY) + keylen = cookie->key_len; - if (cookie->def->get_aux && - config & FSCACHE_OBJLIST_CONFIG_AUX) - auxlen = cookie->def->get_aux(cookie->netfs_data, - buf + keylen, 512 - keylen); - fscache_unuse_cookie(obj); + if (config & FSCACHE_OBJLIST_CONFIG_AUX) + auxlen = cookie->aux_len; if (keylen > 0 || auxlen > 0) { seq_puts(m, " "); - for (p = buf; keylen > 0; keylen--) + p = keylen <= sizeof(cookie->inline_key) ? + cookie->inline_key : cookie->key; + for (; keylen > 0; keylen--) seq_printf(m, "%02x", *p++); if (auxlen > 0) { if (config & FSCACHE_OBJLIST_CONFIG_KEY) seq_puts(m, ", "); + p = auxlen <= sizeof(cookie->inline_aux) ? + cookie->inline_aux : cookie->aux; for (; auxlen > 0; auxlen--) seq_printf(m, "%02x", *p++); } } seq_puts(m, "\n"); + fscache_unuse_cookie(obj); } else { seq_puts(m, "<no_netfs>\n"); } diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 7a182c87f378..1085ca12e25c 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -138,10 +138,13 @@ static const struct fscache_transition fscache_osm_run_oob[] = { { 0, NULL } }; -static int fscache_get_object(struct fscache_object *); -static void fscache_put_object(struct fscache_object *); +static int fscache_get_object(struct fscache_object *, + enum fscache_obj_ref_trace); +static void fscache_put_object(struct fscache_object *, + enum fscache_obj_ref_trace); static bool fscache_enqueue_dependents(struct fscache_object *, int); static void fscache_dequeue_object(struct fscache_object *); +static void fscache_update_aux_data(struct fscache_object *); /* * we need to notify the parent when an op completes that we had outstanding @@ -170,6 +173,7 @@ static void fscache_object_sm_dispatcher(struct fscache_object *object) const struct fscache_transition *t; const struct fscache_state *state, *new_state; unsigned long events, event_mask; + bool oob; int event = -1; ASSERT(object != NULL); @@ -188,6 +192,7 @@ restart_masked: if (events & object->oob_event_mask) { _debug("{OBJ%x} oob %lx", object->debug_id, events & object->oob_event_mask); + oob = true; for (t = object->oob_table; t->events; t++) { if (events & t->events) { state = t->transit_to; @@ -199,6 +204,7 @@ restart_masked: } } } + oob = false; /* Wait states are just transition tables */ if (!state->work) { @@ -207,6 +213,8 @@ restart_masked: if (events & t->events) { new_state = t->transit_to; event = fls(events & t->events) - 1; + trace_fscache_osm(object, state, + true, false, event); clear_bit(event, &object->events); _debug("{OBJ%x} ev %d: %s -> %s", object->debug_id, event, @@ -226,6 +234,7 @@ restart_masked: execute_work_state: _debug("{OBJ%x} exec %s", object->debug_id, state->name); + trace_fscache_osm(object, state, false, oob, event); new_state = state->work(object, event); event = -1; if (new_state == NO_TRANSIT) { @@ -279,7 +288,7 @@ static void fscache_object_work_func(struct work_struct *work) start = jiffies; fscache_object_sm_dispatcher(object); fscache_hist(fscache_objs_histogram, start); - fscache_put_object(object); + fscache_put_object(object, fscache_obj_put_work); } /** @@ -397,7 +406,7 @@ static const struct fscache_state *fscache_initialise_object(struct fscache_obje fscache_stat(&fscache_n_cop_grab_object); success = false; if (fscache_object_is_live(parent) && - object->cache->ops->grab_object(object)) { + object->cache->ops->grab_object(object, fscache_obj_get_add_to_deps)) { list_add(&object->dep_link, &parent->dependents); success = true; } @@ -703,6 +712,11 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob ASSERT(cookie != NULL); ASSERT(!hlist_unhashed(&object->cookie_link)); + if (test_bit(FSCACHE_COOKIE_AUX_UPDATED, &cookie->flags)) { + _debug("final update"); + fscache_update_aux_data(object); + } + /* Make sure the cookie no longer points here and that the netfs isn't * waiting for us. */ @@ -745,7 +759,7 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob } /* this just shifts the object release to the work processor */ - fscache_put_object(object); + fscache_put_object(object, fscache_obj_put_drop_obj); fscache_stat(&fscache_n_object_dead); _leave(""); @@ -755,12 +769,13 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob /* * get a ref on an object */ -static int fscache_get_object(struct fscache_object *object) +static int fscache_get_object(struct fscache_object *object, + enum fscache_obj_ref_trace why) { int ret; fscache_stat(&fscache_n_cop_grab_object); - ret = object->cache->ops->grab_object(object) ? 0 : -EAGAIN; + ret = object->cache->ops->grab_object(object, why) ? 0 : -EAGAIN; fscache_stat_d(&fscache_n_cop_grab_object); return ret; } @@ -768,10 +783,11 @@ static int fscache_get_object(struct fscache_object *object) /* * Discard a ref on an object */ -static void fscache_put_object(struct fscache_object *object) +static void fscache_put_object(struct fscache_object *object, + enum fscache_obj_ref_trace why) { fscache_stat(&fscache_n_cop_put_object); - object->cache->ops->put_object(object); + object->cache->ops->put_object(object, why); fscache_stat_d(&fscache_n_cop_put_object); } @@ -786,7 +802,7 @@ void fscache_object_destroy(struct fscache_object *object) fscache_objlist_remove(object); /* We can get rid of the cookie now */ - fscache_cookie_put(object->cookie); + fscache_cookie_put(object->cookie, fscache_cookie_put_object); object->cookie = NULL; } EXPORT_SYMBOL(fscache_object_destroy); @@ -798,7 +814,7 @@ void fscache_enqueue_object(struct fscache_object *object) { _enter("{OBJ%x}", object->debug_id); - if (fscache_get_object(object) >= 0) { + if (fscache_get_object(object, fscache_obj_get_queue) >= 0) { wait_queue_head_t *cong_wq = &get_cpu_var(fscache_object_cong_wait); @@ -806,7 +822,7 @@ void fscache_enqueue_object(struct fscache_object *object) if (fscache_object_congested()) wake_up(cong_wq); } else - fscache_put_object(object); + fscache_put_object(object, fscache_obj_put_queue); put_cpu_var(fscache_object_cong_wait); } @@ -866,7 +882,7 @@ static bool fscache_enqueue_dependents(struct fscache_object *object, int event) list_del_init(&dep->dep_link); fscache_raise_event(dep, event); - fscache_put_object(dep); + fscache_put_object(dep, fscache_obj_put_enq_dep); if (!list_empty(&object->dependents) && need_resched()) { ret = false; @@ -906,7 +922,8 @@ static void fscache_dequeue_object(struct fscache_object *object) * and creation). */ enum fscache_checkaux fscache_check_aux(struct fscache_object *object, - const void *data, uint16_t datalen) + const void *data, uint16_t datalen, + loff_t object_size) { enum fscache_checkaux result; @@ -916,7 +933,7 @@ enum fscache_checkaux fscache_check_aux(struct fscache_object *object, } result = object->cookie->def->check_aux(object->cookie->netfs_data, - data, datalen); + data, datalen, object_size); switch (result) { /* entry okay as is */ case FSCACHE_CHECKAUX_OKAY: @@ -972,11 +989,12 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj if (!op) goto nomem; - fscache_operation_init(op, object->cache->ops->invalidate_object, + fscache_operation_init(cookie, op, object->cache->ops->invalidate_object, NULL, NULL); op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE) | (1 << FSCACHE_OP_UNUSE_COOKIE); + trace_fscache_page_op(cookie, NULL, op, fscache_page_op_invalidate); spin_lock(&cookie->lock); if (fscache_submit_exclusive_op(object, op) < 0) @@ -1026,6 +1044,17 @@ static const struct fscache_state *fscache_invalidate_object(struct fscache_obje } /* + * Update auxiliary data. + */ +static void fscache_update_aux_data(struct fscache_object *object) +{ + fscache_stat(&fscache_n_updates_run); + fscache_stat(&fscache_n_cop_update_object); + object->cache->ops->update_object(object); + fscache_stat_d(&fscache_n_cop_update_object); +} + +/* * Asynchronously update an object. */ static const struct fscache_state *fscache_update_object(struct fscache_object *object, @@ -1033,10 +1062,7 @@ static const struct fscache_state *fscache_update_object(struct fscache_object * { _enter("{OBJ%x},%d", object->debug_id, event); - fscache_stat(&fscache_n_updates_run); - fscache_stat(&fscache_n_cop_update_object); - object->cache->ops->update_object(object); - fscache_stat_d(&fscache_n_cop_update_object); + fscache_update_aux_data(object); _leave(""); return transit_to(WAIT_FOR_CMD); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index de67745e1cd7..e30c5975ea58 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -32,7 +32,8 @@ static void fscache_operation_dummy_cancel(struct fscache_operation *op) * Do basic initialisation of an operation. The caller must still set flags, * object and processor if needed. */ -void fscache_operation_init(struct fscache_operation *op, +void fscache_operation_init(struct fscache_cookie *cookie, + struct fscache_operation *op, fscache_operation_processor_t processor, fscache_operation_cancel_t cancel, fscache_operation_release_t release) @@ -46,6 +47,7 @@ void fscache_operation_init(struct fscache_operation *op, op->release = release; INIT_LIST_HEAD(&op->pend_link); fscache_stat(&fscache_n_op_initialised); + trace_fscache_op(cookie, op, fscache_op_init); } EXPORT_SYMBOL(fscache_operation_init); @@ -59,6 +61,8 @@ EXPORT_SYMBOL(fscache_operation_init); */ void fscache_enqueue_operation(struct fscache_operation *op) { + struct fscache_cookie *cookie = op->object->cookie; + _enter("{OBJ%x OP%x,%u}", op->object->debug_id, op->debug_id, atomic_read(&op->usage)); @@ -71,12 +75,14 @@ void fscache_enqueue_operation(struct fscache_operation *op) fscache_stat(&fscache_n_op_enqueue); switch (op->flags & FSCACHE_OP_TYPE) { case FSCACHE_OP_ASYNC: + trace_fscache_op(cookie, op, fscache_op_enqueue_async); _debug("queue async"); atomic_inc(&op->usage); if (!queue_work(fscache_op_wq, &op->work)) fscache_put_operation(op); break; case FSCACHE_OP_MYTHREAD: + trace_fscache_op(cookie, op, fscache_op_enqueue_mythread); _debug("queue for caller's attention"); break; default: @@ -101,6 +107,8 @@ static void fscache_run_op(struct fscache_object *object, wake_up_bit(&op->flags, FSCACHE_OP_WAITING); if (op->processor) fscache_enqueue_operation(op); + else + trace_fscache_op(object->cookie, op, fscache_op_run); fscache_stat(&fscache_n_op_run); } @@ -155,6 +163,8 @@ int fscache_submit_exclusive_op(struct fscache_object *object, _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); + trace_fscache_op(object->cookie, op, fscache_op_submit_ex); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); ASSERTCMP(atomic_read(&op->usage), >, 0); @@ -240,6 +250,8 @@ int fscache_submit_op(struct fscache_object *object, _enter("{OBJ%x OP%x},{%u}", object->debug_id, op->debug_id, atomic_read(&op->usage)); + trace_fscache_op(object->cookie, op, fscache_op_submit); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); ASSERTCMP(atomic_read(&op->usage), >, 0); @@ -357,6 +369,8 @@ int fscache_cancel_op(struct fscache_operation *op, _enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id); + trace_fscache_op(object->cookie, op, fscache_op_cancel); + ASSERTCMP(op->state, >=, FSCACHE_OP_ST_PENDING); ASSERTCMP(op->state, !=, FSCACHE_OP_ST_CANCELLED); ASSERTCMP(atomic_read(&op->usage), >, 0); @@ -419,6 +433,8 @@ void fscache_cancel_all_ops(struct fscache_object *object) fscache_stat(&fscache_n_op_cancelled); list_del_init(&op->pend_link); + trace_fscache_op(object->cookie, op, fscache_op_cancel_all); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; @@ -454,9 +470,11 @@ void fscache_op_complete(struct fscache_operation *op, bool cancelled) spin_lock(&object->lock); if (!cancelled) { + trace_fscache_op(object->cookie, op, fscache_op_completed); op->state = FSCACHE_OP_ST_COMPLETE; } else { op->cancel(op); + trace_fscache_op(object->cookie, op, fscache_op_cancelled); op->state = FSCACHE_OP_ST_CANCELLED; } @@ -488,6 +506,8 @@ void fscache_put_operation(struct fscache_operation *op) if (!atomic_dec_and_test(&op->usage)) return; + trace_fscache_op(op->object ? op->object->cookie : NULL, op, fscache_op_put); + _debug("PUT OP"); ASSERTIFCMP(op->state != FSCACHE_OP_ST_INITIALISED && op->state != FSCACHE_OP_ST_COMPLETE, @@ -563,6 +583,8 @@ void fscache_operation_gc(struct work_struct *work) spin_unlock(&cache->op_gc_list_lock); object = op->object; + trace_fscache_op(object->cookie, op, fscache_op_gc); + spin_lock(&object->lock); _debug("GC DEFERRED REL OBJ%x OP%x", @@ -601,6 +623,8 @@ void fscache_op_work_func(struct work_struct *work) _enter("{OBJ%x OP%x,%d}", op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + trace_fscache_op(op->object->cookie, op, fscache_op_work); + ASSERT(op->processor != NULL); start = jiffies; op->processor(op); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 961029e04027..111349f67d98 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -27,6 +27,7 @@ bool __fscache_check_page_write(struct fscache_cookie *cookie, struct page *page rcu_read_lock(); val = radix_tree_lookup(&cookie->stores, page->index); rcu_read_unlock(); + trace_fscache_check_page(cookie, page, val, 0); return val != NULL; } @@ -39,6 +40,8 @@ void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *pa { wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0); + trace_fscache_page(cookie, page, fscache_page_write_wait); + wait_event(*wq, !__fscache_check_page_write(cookie, page)); } EXPORT_SYMBOL(__fscache_wait_on_page_write); @@ -69,6 +72,8 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie, _enter("%p,%p,%x", cookie, page, gfp); + trace_fscache_page(cookie, page, fscache_page_maybe_release); + try_again: rcu_read_lock(); val = radix_tree_lookup(&cookie->stores, page->index); @@ -101,6 +106,7 @@ try_again: } xpage = radix_tree_delete(&cookie->stores, page->index); + trace_fscache_page(cookie, page, fscache_page_radix_delete); spin_unlock(&cookie->stores_lock); if (xpage) { @@ -112,6 +118,7 @@ try_again: } wake_up_bit(&cookie->flags, 0); + trace_fscache_wake_cookie(cookie); if (xpage) put_page(xpage); __fscache_uncache_page(cookie, page); @@ -144,7 +151,7 @@ static void fscache_end_page_write(struct fscache_object *object, struct page *page) { struct fscache_cookie *cookie; - struct page *xpage = NULL; + struct page *xpage = NULL, *val; spin_lock(&object->lock); cookie = object->cookie; @@ -154,13 +161,24 @@ static void fscache_end_page_write(struct fscache_object *object, spin_lock(&cookie->stores_lock); radix_tree_tag_clear(&cookie->stores, page->index, FSCACHE_COOKIE_STORING_TAG); + trace_fscache_page(cookie, page, fscache_page_radix_clear_store); if (!radix_tree_tag_get(&cookie->stores, page->index, FSCACHE_COOKIE_PENDING_TAG)) { fscache_stat(&fscache_n_store_radix_deletes); xpage = radix_tree_delete(&cookie->stores, page->index); + trace_fscache_page(cookie, page, fscache_page_radix_delete); + trace_fscache_page(cookie, page, fscache_page_write_end); + + val = radix_tree_lookup(&cookie->stores, page->index); + trace_fscache_check_page(cookie, page, val, 1); + } else { + trace_fscache_page(cookie, page, fscache_page_write_end_pend); } spin_unlock(&cookie->stores_lock); wake_up_bit(&cookie->flags, 0); + trace_fscache_wake_cookie(cookie); + } else { + trace_fscache_page(cookie, page, fscache_page_write_end_noc); } spin_unlock(&object->lock); if (xpage) @@ -185,9 +203,11 @@ static void fscache_attr_changed_op(struct fscache_operation *op) fscache_stat_d(&fscache_n_cop_attr_changed); if (ret < 0) fscache_abort_object(object); + fscache_op_complete(op, ret < 0); + } else { + fscache_op_complete(op, true); } - fscache_op_complete(op, true); _leave(""); } @@ -213,7 +233,8 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) return -ENOMEM; } - fscache_operation_init(op, fscache_attr_changed_op, NULL, NULL); + fscache_operation_init(cookie, op, fscache_attr_changed_op, NULL, NULL); + trace_fscache_page_op(cookie, NULL, op, fscache_page_op_attr_changed); op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE) | (1 << FSCACHE_OP_UNUSE_COOKIE); @@ -297,7 +318,7 @@ static struct fscache_retrieval *fscache_alloc_retrieval( return NULL; } - fscache_operation_init(&op->op, NULL, + fscache_operation_init(cookie, &op->op, NULL, fscache_do_cancel_retrieval, fscache_release_retrieval_op); op->op.flags = FSCACHE_OP_MYTHREAD | @@ -368,6 +389,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object, fscache_stat(stat_op_waits); if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING, TASK_INTERRUPTIBLE) != 0) { + trace_fscache_op(object->cookie, op, fscache_op_signal); ret = fscache_cancel_op(op, false); if (ret == 0) return -ERESTARTSYS; @@ -389,6 +411,7 @@ check_if_dead: if (unlikely(fscache_object_is_dying(object) || fscache_cache_is_broken(object))) { enum fscache_operation_state state = op->state; + trace_fscache_op(object->cookie, op, fscache_op_signal); fscache_cancel_op(op, true); if (stat_object_dead) fscache_stat(stat_object_dead); @@ -443,6 +466,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, return -ENOMEM; } atomic_set(&op->n_pages, 1); + trace_fscache_page_op(cookie, page, &op->op, fscache_page_op_retr_one); spin_lock(&cookie->lock); @@ -571,6 +595,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, if (!op) return -ENOMEM; atomic_set(&op->n_pages, *nr_pages); + trace_fscache_page_op(cookie, NULL, &op->op, fscache_page_op_retr_multi); spin_lock(&cookie->lock); @@ -682,6 +707,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, if (!op) return -ENOMEM; atomic_set(&op->n_pages, 1); + trace_fscache_page_op(cookie, page, &op->op, fscache_page_op_alloc_one); spin_lock(&cookie->lock); @@ -776,15 +802,17 @@ static void fscache_write_op(struct fscache_operation *_op) _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); +again: spin_lock(&object->lock); cookie = object->cookie; if (!fscache_object_is_active(object)) { - /* If we get here, then the on-disk cache object likely longer - * exists, so we should just cancel this write operation. + /* If we get here, then the on-disk cache object likely no + * longer exists, so we should just cancel this write + * operation. */ spin_unlock(&object->lock); - fscache_op_complete(&op->op, false); + fscache_op_complete(&op->op, true); _leave(" [inactive]"); return; } @@ -797,7 +825,7 @@ static void fscache_write_op(struct fscache_operation *_op) * cancel this write operation. */ spin_unlock(&object->lock); - fscache_op_complete(&op->op, false); + fscache_op_complete(&op->op, true); _leave(" [cancel] op{f=%lx s=%u} obj{s=%s f=%lx}", _op->flags, _op->state, object->state->short_name, object->flags); @@ -809,30 +837,33 @@ static void fscache_write_op(struct fscache_operation *_op) fscache_stat(&fscache_n_store_calls); /* find a page to store */ + results[0] = NULL; page = NULL; n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, 1, FSCACHE_COOKIE_PENDING_TAG); + trace_fscache_gang_lookup(cookie, &op->op, results, n, op->store_limit); if (n != 1) goto superseded; page = results[0]; _debug("gang %d [%lx]", n, page->index); - if (page->index >= op->store_limit) { - fscache_stat(&fscache_n_store_pages_over_limit); - goto superseded; - } radix_tree_tag_set(&cookie->stores, page->index, FSCACHE_COOKIE_STORING_TAG); radix_tree_tag_clear(&cookie->stores, page->index, FSCACHE_COOKIE_PENDING_TAG); + trace_fscache_page(cookie, page, fscache_page_radix_pend2store); spin_unlock(&cookie->stores_lock); spin_unlock(&object->lock); + if (page->index >= op->store_limit) + goto discard_page; + fscache_stat(&fscache_n_store_pages); fscache_stat(&fscache_n_cop_write_page); ret = object->cache->ops->write_page(op, page); fscache_stat_d(&fscache_n_cop_write_page); + trace_fscache_wrote_page(cookie, page, &op->op, ret); fscache_end_page_write(object, page); if (ret < 0) { fscache_abort_object(object); @@ -844,6 +875,12 @@ static void fscache_write_op(struct fscache_operation *_op) _leave(""); return; +discard_page: + fscache_stat(&fscache_n_store_pages_over_limit); + trace_fscache_wrote_page(cookie, page, &op->op, -ENOBUFS); + fscache_end_page_write(object, page); + goto again; + superseded: /* this writer is going away and there aren't any more things to * write */ @@ -851,7 +888,7 @@ superseded: spin_unlock(&cookie->stores_lock); clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); spin_unlock(&object->lock); - fscache_op_complete(&op->op, true); + fscache_op_complete(&op->op, false); _leave(""); } @@ -879,6 +916,8 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) for (i = n - 1; i >= 0; i--) { page = results[i]; radix_tree_delete(&cookie->stores, page->index); + trace_fscache_page(cookie, page, fscache_page_radix_delete); + trace_fscache_page(cookie, page, fscache_page_inval); } spin_unlock(&cookie->stores_lock); @@ -888,6 +927,7 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) } wake_up_bit(&cookie->flags, 0); + trace_fscache_wake_cookie(cookie); _leave(""); } @@ -923,6 +963,7 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) */ int __fscache_write_page(struct fscache_cookie *cookie, struct page *page, + loff_t object_size, gfp_t gfp) { struct fscache_storage *op; @@ -946,7 +987,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, if (!op) goto nomem; - fscache_operation_init(&op->op, fscache_write_op, NULL, + fscache_operation_init(cookie, &op->op, fscache_write_op, NULL, fscache_release_write_op); op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING) | @@ -956,6 +997,8 @@ int __fscache_write_page(struct fscache_cookie *cookie, if (ret < 0) goto nomem_free; + trace_fscache_page_op(cookie, page, &op->op, fscache_page_op_write_one); + ret = -ENOBUFS; spin_lock(&cookie->lock); @@ -967,9 +1010,15 @@ int __fscache_write_page(struct fscache_cookie *cookie, if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) goto nobufs; + trace_fscache_page(cookie, page, fscache_page_write); + /* add the page to the pending-storage radix tree on the backing * object */ spin_lock(&object->lock); + + if (object->store_limit_l != object_size) + fscache_set_store_limit(object, object_size); + spin_lock(&cookie->stores_lock); _debug("store limit %llx", (unsigned long long) object->store_limit); @@ -982,8 +1031,10 @@ int __fscache_write_page(struct fscache_cookie *cookie, goto nobufs_unlock_obj; } + trace_fscache_page(cookie, page, fscache_page_radix_insert); radix_tree_tag_set(&cookie->stores, page->index, FSCACHE_COOKIE_PENDING_TAG); + trace_fscache_page(cookie, page, fscache_page_radix_set_pend); get_page(page); /* we only want one writer at a time, but we do need to queue new @@ -1026,6 +1077,7 @@ already_pending: submit_failed: spin_lock(&cookie->stores_lock); radix_tree_delete(&cookie->stores, page->index); + trace_fscache_page(cookie, page, fscache_page_radix_delete); spin_unlock(&cookie->stores_lock); wake_cookie = __fscache_unuse_cookie(cookie); put_page(page); @@ -1072,6 +1124,8 @@ void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page) if (!PageFsCache(page)) goto done; + trace_fscache_page(cookie, page, fscache_page_uncache); + /* get the object */ spin_lock(&cookie->lock); @@ -1120,6 +1174,8 @@ void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page) atomic_inc(&fscache_n_marks); #endif + trace_fscache_page(cookie, page, fscache_page_cached); + _debug("- mark %p{%lx}", page, page->index); if (TestSetPageFsCache(page)) { static bool once_only; diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 7ac6e839b065..fcc8c2f2690e 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -21,7 +21,6 @@ atomic_t fscache_n_op_pend; atomic_t fscache_n_op_run; atomic_t fscache_n_op_enqueue; -atomic_t fscache_n_op_requeue; atomic_t fscache_n_op_deferred_release; atomic_t fscache_n_op_initialised; atomic_t fscache_n_op_release; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 624f18bbfd2b..ef309958e060 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1080,6 +1080,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_time_gran = 1; sb->s_export_op = &fuse_export_operations; + sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; + if (sb->s_user_ns != &init_user_ns) + sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; file = fget(d.fd); err = -EINVAL; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 620be0521866..cf5c7f3080d2 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -800,7 +800,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) int need_endtrans = 0; int ret; - if (!(flags & (I_DIRTY_DATASYNC|I_DIRTY_SYNC))) + if (!(flags & I_DIRTY_INODE)) return; if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) return; diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 05de20954659..f2bce1e0f6fb 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -308,7 +308,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, } ip->i_inode.i_ctime = current_time(&ip->i_inode); - __mark_inode_dirty(&ip->i_inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC); + __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); gfs2_trans_end(sdp); @@ -768,7 +768,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, goto out_end_trans; ip->i_inode.i_ctime = current_time(&ip->i_inode); - __mark_inode_dirty(&ip->i_inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC); + __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); out_end_trans: gfs2_trans_end(GFS2_SB(&ip->i_inode)); @@ -896,7 +896,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, ea_set_remove_stuffed(ip, es->es_el); ip->i_inode.i_ctime = current_time(&ip->i_inode); - __mark_inode_dirty(&ip->i_inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC); + __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); gfs2_trans_end(GFS2_SB(&ip->i_inode)); return error; @@ -1114,7 +1114,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) } ip->i_inode.i_ctime = current_time(&ip->i_inode); - __mark_inode_dirty(&ip->i_inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC); + __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); gfs2_trans_end(GFS2_SB(&ip->i_inode)); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b9a254dcc0e7..d508c7844681 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -138,10 +138,14 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) /* * page based offset in vm_pgoff could be sufficiently large to - * overflow a (l)off_t when converted to byte offset. + * overflow a loff_t when converted to byte offset. This can + * only happen on architectures where sizeof(loff_t) == + * sizeof(unsigned long). So, only check in those instances. */ - if (vma->vm_pgoff & PGOFF_LOFFT_MAX) - return -EINVAL; + if (sizeof(unsigned long) == sizeof(loff_t)) { + if (vma->vm_pgoff & PGOFF_LOFFT_MAX) + return -EINVAL; + } /* must be huge page aligned */ if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) diff --git a/fs/internal.h b/fs/internal.h index 980d005b21b4..e08972db0303 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -125,7 +125,6 @@ int do_fchmodat(int dfd, const char __user *filename, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); -extern int open_check_o_direct(struct file *f); extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file *filp_clone_open(struct file *); diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index 4a6cf289be24..83b8f06b4a64 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -21,14 +21,6 @@ #include <linux/pagemap.h> #include "nodelist.h" -struct erase_priv_struct { - struct jffs2_eraseblock *jeb; - struct jffs2_sb_info *c; -}; - -#ifndef __ECOS -static void jffs2_erase_callback(struct erase_info *); -#endif static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t bad_offset); static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); @@ -51,7 +43,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c, jffs2_dbg(1, "%s(): erase block %#08x (range %#08x-%#08x)\n", __func__, jeb->offset, jeb->offset, jeb->offset + c->sector_size); - instr = kmalloc(sizeof(struct erase_info) + sizeof(struct erase_priv_struct), GFP_KERNEL); + instr = kmalloc(sizeof(struct erase_info), GFP_KERNEL); if (!instr) { pr_warn("kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n"); mutex_lock(&c->erase_free_sem); @@ -67,18 +59,15 @@ static void jffs2_erase_block(struct jffs2_sb_info *c, memset(instr, 0, sizeof(*instr)); - instr->mtd = c->mtd; instr->addr = jeb->offset; instr->len = c->sector_size; - instr->callback = jffs2_erase_callback; - instr->priv = (unsigned long)(&instr[1]); - - ((struct erase_priv_struct *)instr->priv)->jeb = jeb; - ((struct erase_priv_struct *)instr->priv)->c = c; ret = mtd_erase(c->mtd, instr); - if (!ret) + if (!ret) { + jffs2_erase_succeeded(c, jeb); + kfree(instr); return; + } bad_offset = instr->fail_addr; kfree(instr); @@ -214,22 +203,6 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock wake_up(&c->erase_wait); } -#ifndef __ECOS -static void jffs2_erase_callback(struct erase_info *instr) -{ - struct erase_priv_struct *priv = (void *)instr->priv; - - if(instr->state != MTD_ERASE_DONE) { - pr_warn("Erase at 0x%08llx finished, but state != MTD_ERASE_DONE. State is 0x%x instead.\n", - (unsigned long long)instr->addr, instr->state); - jffs2_erase_failed(priv->c, priv->jeb, instr->fail_addr); - } else { - jffs2_erase_succeeded(priv->c, priv->jeb); - } - kfree(instr); -} -#endif /* !__ECOS */ - /* Hmmm. Maybe we should accept the extra space it takes and make this a standard doubly-linked list? */ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c, diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 9c36d614bf89..346ed161756d 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -57,8 +57,8 @@ static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; -atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0); -DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq); +static atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq); unsigned int lockd_net_id; diff --git a/fs/locks.c b/fs/locks.c index d6ff4beb70ce..62bbe8b31f26 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -559,7 +559,7 @@ static const struct lock_manager_operations lease_manager_ops = { * Initialize a lease, use the default lock manager operations */ static int lease_init(struct file *filp, long type, struct file_lock *fl) - { +{ if (assign_type(fl, type) != 0) return -EINVAL; diff --git a/fs/namei.c b/fs/namei.c index a09419379f5d..186bd2464fd5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -39,6 +39,7 @@ #include <linux/bitops.h> #include <linux/init_task.h> #include <linux/uaccess.h> +#include <linux/build_bug.h> #include "internal.h" #include "mount.h" @@ -130,6 +131,7 @@ getname_flags(const char __user *filename, int flags, int *empty) struct filename *result; char *kname; int len; + BUILD_BUG_ON(offsetof(struct filename, iname) % sizeof(long) != 0); result = audit_reusename(filename); if (result) @@ -222,9 +224,10 @@ getname_kernel(const char * filename) if (len <= EMBEDDED_NAME_MAX) { result->name = (char *)result->iname; } else if (len <= PATH_MAX) { + const size_t size = offsetof(struct filename, iname[1]); struct filename *tmp; - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); + tmp = kmalloc(size, GFP_KERNEL); if (unlikely(!tmp)) { __putname(result); return ERR_PTR(-ENOMEM); @@ -927,7 +930,8 @@ static inline int may_follow_link(struct nameidata *nd) if (nd->flags & LOOKUP_RCU) return -ECHILD; - audit_log_link_denied("follow_link", &nd->stack[0].link); + audit_inode(nd->name, nd->stack[0].link.dentry, 0); + audit_log_link_denied("follow_link"); return -EACCES; } @@ -993,7 +997,7 @@ static int may_linkat(struct path *link) if (safe_hardlink_source(inode) || inode_owner_or_capable(inode)) return 0; - audit_log_link_denied("linkat", link); + audit_log_link_denied("linkat"); return -EPERM; } @@ -1594,22 +1598,21 @@ static int lookup_fast(struct nameidata *nd, } /* Fast lookup failed, do it the slow way */ -static struct dentry *lookup_slow(const struct qstr *name, - struct dentry *dir, - unsigned int flags) +static struct dentry *__lookup_slow(const struct qstr *name, + struct dentry *dir, + unsigned int flags) { - struct dentry *dentry = ERR_PTR(-ENOENT), *old; + struct dentry *dentry, *old; struct inode *inode = dir->d_inode; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); - inode_lock_shared(inode); /* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) - goto out; + return ERR_PTR(-ENOENT); again: dentry = d_alloc_parallel(dir, name, &wq); if (IS_ERR(dentry)) - goto out; + return dentry; if (unlikely(!d_in_lookup(dentry))) { if (!(flags & LOOKUP_NO_REVAL)) { int error = d_revalidate(dentry, flags); @@ -1631,11 +1634,21 @@ again: dentry = old; } } -out: - inode_unlock_shared(inode); return dentry; } +static struct dentry *lookup_slow(const struct qstr *name, + struct dentry *dir, + unsigned int flags) +{ + struct inode *inode = dir->d_inode; + struct dentry *res; + inode_lock_shared(inode); + res = __lookup_slow(name, dir, flags); + inode_unlock_shared(inode); + return res; +} + static inline int may_lookup(struct nameidata *nd) { if (nd->flags & LOOKUP_RCU) { @@ -2418,56 +2431,63 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, } EXPORT_SYMBOL(vfs_path_lookup); -/** - * lookup_one_len - filesystem helper to lookup single pathname component - * @name: pathname component to lookup - * @base: base directory to lookup from - * @len: maximum length @len should be interpreted to - * - * Note that this routine is purely a helper for filesystem usage and should - * not be called by generic code. - * - * The caller must hold base->i_mutex. - */ -struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) +static int lookup_one_len_common(const char *name, struct dentry *base, + int len, struct qstr *this) { - struct qstr this; - unsigned int c; - int err; - - WARN_ON_ONCE(!inode_is_locked(base->d_inode)); - - this.name = name; - this.len = len; - this.hash = full_name_hash(base, name, len); + this->name = name; + this->len = len; + this->hash = full_name_hash(base, name, len); if (!len) - return ERR_PTR(-EACCES); + return -EACCES; if (unlikely(name[0] == '.')) { if (len < 2 || (len == 2 && name[1] == '.')) - return ERR_PTR(-EACCES); + return -EACCES; } while (len--) { - c = *(const unsigned char *)name++; + unsigned int c = *(const unsigned char *)name++; if (c == '/' || c == '\0') - return ERR_PTR(-EACCES); + return -EACCES; } /* * See if the low-level filesystem might want * to use its own hash.. */ if (base->d_flags & DCACHE_OP_HASH) { - int err = base->d_op->d_hash(base, &this); + int err = base->d_op->d_hash(base, this); if (err < 0) - return ERR_PTR(err); + return err; } - err = inode_permission(base->d_inode, MAY_EXEC); + return inode_permission(base->d_inode, MAY_EXEC); +} + +/** + * lookup_one_len - filesystem helper to lookup single pathname component + * @name: pathname component to lookup + * @base: base directory to lookup from + * @len: maximum length @len should be interpreted to + * + * Note that this routine is purely a helper for filesystem usage and should + * not be called by generic code. + * + * The caller must hold base->i_mutex. + */ +struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) +{ + struct dentry *dentry; + struct qstr this; + int err; + + WARN_ON_ONCE(!inode_is_locked(base->d_inode)); + + err = lookup_one_len_common(name, base, len, &this); if (err) return ERR_PTR(err); - return __lookup_hash(&this, base, 0); + dentry = lookup_dcache(&this, base, 0); + return dentry ? dentry : __lookup_slow(&this, base, 0); } EXPORT_SYMBOL(lookup_one_len); @@ -2487,37 +2507,10 @@ struct dentry *lookup_one_len_unlocked(const char *name, struct dentry *base, int len) { struct qstr this; - unsigned int c; int err; struct dentry *ret; - this.name = name; - this.len = len; - this.hash = full_name_hash(base, name, len); - if (!len) - return ERR_PTR(-EACCES); - - if (unlikely(name[0] == '.')) { - if (len < 2 || (len == 2 && name[1] == '.')) - return ERR_PTR(-EACCES); - } - - while (len--) { - c = *(const unsigned char *)name++; - if (c == '/' || c == '\0') - return ERR_PTR(-EACCES); - } - /* - * See if the low-level filesystem might want - * to use its own hash.. - */ - if (base->d_flags & DCACHE_OP_HASH) { - int err = base->d_op->d_hash(base, &this); - if (err < 0) - return ERR_PTR(err); - } - - err = inode_permission(base->d_inode, MAY_EXEC); + err = lookup_one_len_common(name, base, len, &this); if (err) return ERR_PTR(err); @@ -3374,9 +3367,7 @@ finish_open_created: goto out; *opened |= FILE_OPENED; opened: - error = open_check_o_direct(file); - if (!error) - error = ima_file_check(file, op->acc_mode, *opened); + error = ima_file_check(file, op->acc_mode, *opened); if (!error && will_truncate) error = handle_truncate(file); out: @@ -3456,9 +3447,6 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, error = finish_open(file, child, NULL, opened); if (error) goto out2; - error = open_check_o_direct(file); - if (error) - fput(file); out2: mnt_drop_write(path.mnt); out: diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c index 0ee4b93d36ea..1c5d8d31fc0a 100644 --- a/fs/nfs/fscache-index.c +++ b/fs/nfs/fscache-index.c @@ -50,59 +50,6 @@ void nfs_fscache_unregister(void) } /* - * Layout of the key for an NFS server cache object. - */ -struct nfs_server_key { - uint16_t nfsversion; /* NFS protocol version */ - uint16_t family; /* address family */ - uint16_t port; /* IP port */ - union { - struct in_addr ipv4_addr; /* IPv4 address */ - struct in6_addr ipv6_addr; /* IPv6 address */ - } addr[0]; -}; - -/* - * Generate a key to describe a server in the main NFS index - * - We return the length of the key, or 0 if we can't generate one - */ -static uint16_t nfs_server_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct nfs_client *clp = cookie_netfs_data; - const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr; - const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr; - struct nfs_server_key *key = buffer; - uint16_t len = sizeof(struct nfs_server_key); - - memset(key, 0, len); - key->nfsversion = clp->rpc_ops->version; - key->family = clp->cl_addr.ss_family; - - switch (clp->cl_addr.ss_family) { - case AF_INET: - key->port = sin->sin_port; - key->addr[0].ipv4_addr = sin->sin_addr; - len += sizeof(key->addr[0].ipv4_addr); - break; - - case AF_INET6: - key->port = sin6->sin6_port; - key->addr[0].ipv6_addr = sin6->sin6_addr; - len += sizeof(key->addr[0].ipv6_addr); - break; - - default: - printk(KERN_WARNING "NFS: Unknown network family '%d'\n", - clp->cl_addr.ss_family); - len = 0; - break; - } - - return len; -} - -/* * Define the server object for FS-Cache. This is used to describe a server * object to fscache_acquire_cookie(). It is keyed by the NFS protocol and * server address parameters. @@ -110,33 +57,9 @@ static uint16_t nfs_server_get_key(const void *cookie_netfs_data, const struct fscache_cookie_def nfs_fscache_server_index_def = { .name = "NFS.server", .type = FSCACHE_COOKIE_TYPE_INDEX, - .get_key = nfs_server_get_key, }; /* - * Generate a key to describe a superblock key in the main NFS index - */ -static uint16_t nfs_super_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct nfs_fscache_key *key; - const struct nfs_server *nfss = cookie_netfs_data; - uint16_t len; - - key = nfss->fscache_key; - len = sizeof(key->key) + key->key.uniq_len; - if (len > bufmax) { - len = 0; - } else { - memcpy(buffer, &key->key, sizeof(key->key)); - memcpy(buffer + sizeof(key->key), - key->key.uniquifier, key->key.uniq_len); - } - - return len; -} - -/* * Define the superblock object for FS-Cache. This is used to describe a * superblock object to fscache_acquire_cookie(). It is keyed by all the NFS * parameters that might cause a separate superblock. @@ -144,84 +67,9 @@ static uint16_t nfs_super_get_key(const void *cookie_netfs_data, const struct fscache_cookie_def nfs_fscache_super_index_def = { .name = "NFS.super", .type = FSCACHE_COOKIE_TYPE_INDEX, - .get_key = nfs_super_get_key, }; /* - * Definition of the auxiliary data attached to NFS inode storage objects - * within the cache. - * - * The contents of this struct are recorded in the on-disk local cache in the - * auxiliary data attached to the data storage object backing an inode. This - * permits coherency to be managed when a new inode binds to an already extant - * cache object. - */ -struct nfs_fscache_inode_auxdata { - struct timespec mtime; - struct timespec ctime; - loff_t size; - u64 change_attr; -}; - -/* - * Generate a key to describe an NFS inode in an NFS server's index - */ -static uint16_t nfs_fscache_inode_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct nfs_inode *nfsi = cookie_netfs_data; - uint16_t nsize; - - /* use the inode's NFS filehandle as the key */ - nsize = nfsi->fh.size; - memcpy(buffer, nfsi->fh.data, nsize); - return nsize; -} - -/* - * Get certain file attributes from the netfs data - * - This function can be absent for an index - * - Not permitted to return an error - * - The netfs data from the cookie being used as the source is presented - */ -static void nfs_fscache_inode_get_attr(const void *cookie_netfs_data, - uint64_t *size) -{ - const struct nfs_inode *nfsi = cookie_netfs_data; - - *size = nfsi->vfs_inode.i_size; -} - -/* - * Get the auxiliary data from netfs data - * - This function can be absent if the index carries no state data - * - Should store the auxiliary data in the buffer - * - Should return the amount of amount stored - * - Not permitted to return an error - * - The netfs data from the cookie being used as the source is presented - */ -static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - struct nfs_fscache_inode_auxdata auxdata; - const struct nfs_inode *nfsi = cookie_netfs_data; - - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.size = nfsi->vfs_inode.i_size; - auxdata.mtime = nfsi->vfs_inode.i_mtime; - auxdata.ctime = nfsi->vfs_inode.i_ctime; - - if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) - auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); - - if (bufmax > sizeof(auxdata)) - bufmax = sizeof(auxdata); - - memcpy(buffer, &auxdata, bufmax); - return bufmax; -} - -/* * Consult the netfs about the state of an object * - This function can be absent if the index carries no state data * - The netfs data from the cookie being used as the target is @@ -230,7 +78,8 @@ static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data, static enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, const void *data, - uint16_t datalen) + uint16_t datalen, + loff_t object_size) { struct nfs_fscache_inode_auxdata auxdata; struct nfs_inode *nfsi = cookie_netfs_data; @@ -239,7 +88,6 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, return FSCACHE_CHECKAUX_OBSOLETE; memset(&auxdata, 0, sizeof(auxdata)); - auxdata.size = nfsi->vfs_inode.i_size; auxdata.mtime = nfsi->vfs_inode.i_mtime; auxdata.ctime = nfsi->vfs_inode.i_ctime; @@ -288,9 +136,6 @@ static void nfs_fh_put_context(void *cookie_netfs_data, void *context) const struct fscache_cookie_def nfs_fscache_inode_object_def = { .name = "NFS.fh", .type = FSCACHE_COOKIE_TYPE_DATAFILE, - .get_key = nfs_fscache_inode_get_key, - .get_attr = nfs_fscache_inode_get_attr, - .get_aux = nfs_fscache_inode_get_aux, .check_aux = nfs_fscache_inode_check_aux, .get_context = nfs_fh_get_context, .put_context = nfs_fh_put_context, diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index d63bea8bbfbb..b55fc7920c3b 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -18,6 +18,7 @@ #include <linux/in6.h> #include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/iversion.h> #include "internal.h" #include "iostat.h" @@ -29,6 +30,21 @@ static struct rb_root nfs_fscache_keys = RB_ROOT; static DEFINE_SPINLOCK(nfs_fscache_keys_lock); /* + * Layout of the key for an NFS server cache object. + */ +struct nfs_server_key { + struct { + uint16_t nfsversion; /* NFS protocol version */ + uint16_t family; /* address family */ + __be16 port; /* IP port */ + } hdr; + union { + struct in_addr ipv4_addr; /* IPv4 address */ + struct in6_addr ipv6_addr; /* IPv6 address */ + }; +} __packed; + +/* * Get the per-client index cookie for an NFS client if the appropriate mount * flag was set * - We always try and get an index cookie for the client, but get filehandle @@ -36,10 +52,41 @@ static DEFINE_SPINLOCK(nfs_fscache_keys_lock); */ void nfs_fscache_get_client_cookie(struct nfs_client *clp) { + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr; + const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr; + struct nfs_server_key key; + uint16_t len = sizeof(key.hdr); + + memset(&key, 0, sizeof(key)); + key.hdr.nfsversion = clp->rpc_ops->version; + key.hdr.family = clp->cl_addr.ss_family; + + switch (clp->cl_addr.ss_family) { + case AF_INET: + key.hdr.port = sin->sin_port; + key.ipv4_addr = sin->sin_addr; + len += sizeof(key.ipv4_addr); + break; + + case AF_INET6: + key.hdr.port = sin6->sin6_port; + key.ipv6_addr = sin6->sin6_addr; + len += sizeof(key.ipv6_addr); + break; + + default: + printk(KERN_WARNING "NFS: Unknown network family '%d'\n", + clp->cl_addr.ss_family); + clp->fscache = NULL; + return; + } + /* create a cache index for looking up filehandles */ clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index, &nfs_fscache_server_index_def, - clp, true); + &key, len, + NULL, 0, + clp, 0, true); dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n", clp, clp->fscache); } @@ -52,7 +99,7 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp) dfprintk(FSCACHE, "NFS: releasing client cookie (0x%p/0x%p)\n", clp, clp->fscache); - fscache_relinquish_cookie(clp->fscache, 0); + fscache_relinquish_cookie(clp->fscache, NULL, false); clp->fscache = NULL; } @@ -139,7 +186,9 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int /* create a cache index for looking up filehandles */ nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache, &nfs_fscache_super_index_def, - nfss, true); + key, sizeof(*key) + ulen, + NULL, 0, + nfss, 0, true); dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n", nfss, nfss->fscache); return; @@ -163,7 +212,7 @@ void nfs_fscache_release_super_cookie(struct super_block *sb) dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n", nfss, nfss->fscache); - fscache_relinquish_cookie(nfss->fscache, 0); + fscache_relinquish_cookie(nfss->fscache, NULL, false); nfss->fscache = NULL; if (nfss->fscache_key) { @@ -180,14 +229,25 @@ void nfs_fscache_release_super_cookie(struct super_block *sb) */ void nfs_fscache_init_inode(struct inode *inode) { + struct nfs_fscache_inode_auxdata auxdata; struct nfs_inode *nfsi = NFS_I(inode); nfsi->fscache = NULL; if (!S_ISREG(inode->i_mode)) return; + + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.mtime = nfsi->vfs_inode.i_mtime; + auxdata.ctime = nfsi->vfs_inode.i_ctime; + + if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) + auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); + nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache, &nfs_fscache_inode_object_def, - nfsi, false); + nfsi->fh.data, nfsi->fh.size, + &auxdata, sizeof(auxdata), + nfsi, nfsi->vfs_inode.i_size, false); } /* @@ -195,12 +255,16 @@ void nfs_fscache_init_inode(struct inode *inode) */ void nfs_fscache_clear_inode(struct inode *inode) { + struct nfs_fscache_inode_auxdata auxdata; struct nfs_inode *nfsi = NFS_I(inode); struct fscache_cookie *cookie = nfs_i_fscache(inode); dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie); - fscache_relinquish_cookie(cookie, false); + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.mtime = nfsi->vfs_inode.i_mtime; + auxdata.ctime = nfsi->vfs_inode.i_ctime; + fscache_relinquish_cookie(cookie, &auxdata, false); nfsi->fscache = NULL; } @@ -232,20 +296,26 @@ static bool nfs_fscache_can_enable(void *data) */ void nfs_fscache_open_file(struct inode *inode, struct file *filp) { + struct nfs_fscache_inode_auxdata auxdata; struct nfs_inode *nfsi = NFS_I(inode); struct fscache_cookie *cookie = nfs_i_fscache(inode); if (!fscache_cookie_valid(cookie)) return; + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.mtime = nfsi->vfs_inode.i_mtime; + auxdata.ctime = nfsi->vfs_inode.i_ctime; + if (inode_is_open_for_write(inode)) { dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi); clear_bit(NFS_INO_FSCACHE, &nfsi->flags); - fscache_disable_cookie(cookie, true); + fscache_disable_cookie(cookie, &auxdata, true); fscache_uncache_all_inode_pages(cookie, inode); } else { dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi); - fscache_enable_cookie(cookie, nfs_fscache_can_enable, inode); + fscache_enable_cookie(cookie, &auxdata, nfsi->vfs_inode.i_size, + nfs_fscache_can_enable, inode); if (fscache_cookie_enabled(cookie)) set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); } @@ -422,7 +492,8 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n", nfs_i_fscache(inode), page, page->index, page->flags, sync); - ret = fscache_write_page(nfs_i_fscache(inode), page, GFP_KERNEL); + ret = fscache_write_page(nfs_i_fscache(inode), page, + inode->i_size, GFP_KERNEL); dfprintk(FSCACHE, "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", page, page->index, page->flags, ret); diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index d7fe3e799f2f..161ba2edb9d0 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -57,6 +57,21 @@ struct nfs_fscache_key { }; /* + * Definition of the auxiliary data attached to NFS inode storage objects + * within the cache. + * + * The contents of this struct are recorded in the on-disk local cache in the + * auxiliary data attached to the data storage object backing an inode. This + * permits coherency to be managed when a new inode binds to an already extant + * cache object. + */ +struct nfs_fscache_inode_auxdata { + struct timespec mtime; + struct timespec ctime; + u64 change_attr; +}; + +/* * fscache-index.c */ extern struct fscache_netfs nfs_fscache_netfs; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 65c9c4175145..b993ad282de2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -52,7 +52,6 @@ #include <linux/nfs.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> -#include <linux/fs_struct.h> #include "nfs4_fs.h" #include "internal.h" diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 1d0ce3c57d93..6259a4b8579f 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -192,6 +192,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp) struct nfsd3_writeres *resp = rqstp->rq_resp; __be32 nfserr; unsigned long cnt = argp->len; + unsigned int nvecs; dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n", SVCFH_fmt(&argp->fh), @@ -201,9 +202,12 @@ nfsd3_proc_write(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; + nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt); + if (!nvecs) + RETURN_STATUS(nfserr_io); nfserr = nfsd_write(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, argp->vlen, - &cnt, resp->committed); + rqstp->rq_vec, nvecs, &cnt, + resp->committed); resp->count = cnt; RETURN_STATUS(nfserr); } @@ -279,6 +283,16 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) struct nfsd3_diropres *resp = rqstp->rq_resp; __be32 nfserr; + if (argp->tlen == 0) + RETURN_STATUS(nfserr_inval); + if (argp->tlen > NFS3_MAXPATHLEN) + RETURN_STATUS(nfserr_nametoolong); + + argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, + argp->tlen); + if (IS_ERR(argp->tname)) + RETURN_STATUS(nfserrno(PTR_ERR(argp->tname))); + dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n", SVCFH_fmt(&argp->ffh), argp->flen, argp->fname, diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 1a70581e1cb2..3192b544a441 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -391,7 +391,7 @@ int nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_writeargs *args = rqstp->rq_argp; - unsigned int len, v, hdr, dlen; + unsigned int len, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); struct kvec *head = rqstp->rq_arg.head; struct kvec *tail = rqstp->rq_arg.tail; @@ -433,17 +433,9 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) args->count = max_blocksize; len = args->len = max_blocksize; } - rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = head->iov_len - hdr; - v = 0; - while (len > rqstp->rq_vec[v].iov_len) { - len -= rqstp->rq_vec[v].iov_len; - v++; - rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]); - rqstp->rq_vec[v].iov_len = PAGE_SIZE; - } - rqstp->rq_vec[v].iov_len = len; - args->vlen = v + 1; + + args->first.iov_base = (void *)p; + args->first.iov_len = head->iov_len - hdr; return 1; } @@ -489,51 +481,24 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_symlinkargs *args = rqstp->rq_argp; - unsigned int len, avail; - char *old, *new; - struct kvec *vec; + char *base = (char *)p; + size_t dlen; if (!(p = decode_fh(p, &args->ffh)) || - !(p = decode_filename(p, &args->fname, &args->flen)) - ) + !(p = decode_filename(p, &args->fname, &args->flen))) return 0; p = decode_sattr3(p, &args->attrs); - /* now decode the pathname, which might be larger than the first page. - * As we have to check for nul's anyway, we copy it into a new page - * This page appears in the rq_res.pages list, but as pages_len is always - * 0, it won't get in the way - */ - len = ntohl(*p++); - if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) - return 0; - args->tname = new = page_address(*(rqstp->rq_next_page++)); - args->tlen = len; - /* first copy and check from the first page */ - old = (char*)p; - vec = &rqstp->rq_arg.head[0]; - if ((void *)old > vec->iov_base + vec->iov_len) - return 0; - avail = vec->iov_len - (old - (char*)vec->iov_base); - while (len && avail && *old) { - *new++ = *old++; - len--; - avail--; - } - /* now copy next page if there is one */ - if (len && !avail && rqstp->rq_arg.page_len) { - avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE); - old = page_address(rqstp->rq_arg.pages[0]); - } - while (len && avail && *old) { - *new++ = *old++; - len--; - avail--; - } - *new = '\0'; - if (len) - return 0; + args->tlen = ntohl(*p++); + + args->first.iov_base = p; + args->first.iov_len = rqstp->rq_arg.head[0].iov_len; + args->first.iov_len -= (char *)p - base; + dlen = args->first.iov_len + rqstp->rq_arg.page_len + + rqstp->rq_arg.tail[0].iov_len; + if (dlen < XDR_QUADLEN(args->tlen) << 2) + return 0; return 1; } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 49b0a9e7ff18..1f04d2a70d25 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -223,8 +223,8 @@ static int nfs_cb_stat_to_errno(int status) return -status; } -static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected, - int *status) +static int decode_cb_op_status(struct xdr_stream *xdr, + enum nfs_cb_opnum4 expected, int *status) { __be32 *p; u32 op; diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 7d888369f85a..228faf00a594 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -165,7 +165,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid) struct nfs4_client *clp = ls->ls_stid.sc_client; struct nfs4_file *fp = ls->ls_stid.sc_file; - trace_layoutstate_free(&ls->ls_stid.sc_stateid); + trace_nfsd_layoutstate_free(&ls->ls_stid.sc_stateid); spin_lock(&clp->cl_lock); list_del_init(&ls->ls_perclnt); @@ -264,7 +264,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, list_add(&ls->ls_perfile, &fp->fi_lo_states); spin_unlock(&fp->fi_lock); - trace_layoutstate_alloc(&ls->ls_stid.sc_stateid); + trace_nfsd_layoutstate_alloc(&ls->ls_stid.sc_stateid); return ls; } @@ -334,7 +334,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) if (list_empty(&ls->ls_layouts)) goto out_unlock; - trace_layout_recall(&ls->ls_stid.sc_stateid); + trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid); refcount_inc(&ls->ls_stid.sc_count); nfsd4_run_cb(&ls->ls_recall); @@ -507,7 +507,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, false, lrp->lr_layout_type, &ls); if (nfserr) { - trace_layout_return_lookup_fail(&lrp->lr_sid); + trace_nfsd_layout_return_lookup_fail(&lrp->lr_sid); return nfserr; } @@ -523,7 +523,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid); lrp->lrs_present = 1; } else { - trace_layoutstate_unhash(&ls->ls_stid.sc_stateid); + trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid); nfs4_unhash_stid(&ls->ls_stid); lrp->lrs_present = 0; } @@ -694,7 +694,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) /* * Unknown error or non-responding client, we'll need to fence. */ - trace_layout_recall_fail(&ls->ls_stid.sc_stateid); + trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid); ops = nfsd4_layout_ops[ls->ls_layout_type]; if (ops->fence_client) @@ -703,7 +703,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) nfsd4_cb_layout_fail(ls); return -1; case -NFS4ERR_NOMATCHING_LAYOUT: - trace_layout_recall_done(&ls->ls_stid.sc_stateid); + trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid); task->tk_status = 0; return 1; } @@ -716,7 +716,7 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb) container_of(cb, struct nfs4_layout_stateid, ls_recall); LIST_HEAD(reaplist); - trace_layout_recall_release(&ls->ls_stid.sc_stateid); + trace_nfsd_layout_recall_release(&ls->ls_stid.sc_stateid); nfsd4_return_all_layouts(ls, &reaplist); nfsd4_free_layouts(&reaplist); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a0bed2b2004d..5d99e8810b85 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -32,6 +32,7 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/fs_struct.h> #include <linux/file.h> #include <linux/falloc.h> #include <linux/slab.h> @@ -252,11 +253,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru * Note: create modes (UNCHECKED,GUARDED...) are the same * in NFSv4 as in v3 except EXCLUSIVE4_1. */ + current->fs->umask = open->op_umask; status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &open->op_iattr, *resfh, open->op_createmode, (u32 *)open->op_verf.data, &open->op_truncate, &open->op_created); + current->fs->umask = 0; if (!status && open->op_label.len) nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); @@ -603,6 +606,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; + current->fs->umask = create->cr_umask; switch (create->cr_type) { case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, @@ -611,20 +615,22 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NF4BLK: + status = nfserr_inval; rdev = MKDEV(create->cr_specdata1, create->cr_specdata2); if (MAJOR(rdev) != create->cr_specdata1 || MINOR(rdev) != create->cr_specdata2) - return nfserr_inval; + goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, &create->cr_iattr, S_IFBLK, rdev, &resfh); break; case NF4CHR: + status = nfserr_inval; rdev = MKDEV(create->cr_specdata1, create->cr_specdata2); if (MAJOR(rdev) != create->cr_specdata1 || MINOR(rdev) != create->cr_specdata2) - return nfserr_inval; + goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, &create->cr_iattr,S_IFCHR, rdev, &resfh); @@ -668,6 +674,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fh_dup2(&cstate->current_fh, &resfh); out: fh_put(&resfh); +out_umask: + current->fs->umask = 0; return status; } @@ -751,6 +759,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (read->rd_offset >= OFFSET_MAX) return nfserr_inval; + trace_nfsd_read_start(rqstp, &cstate->current_fh, + read->rd_offset, read->rd_length); + /* * If we do a zero copy read, then a client will see read data * that reflects the state of the file *after* performing the @@ -783,6 +794,8 @@ nfsd4_read_release(union nfsd4_op_u *u) { if (u->read.rd_filp) fput(u->read.rd_filp); + trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, + u->read.rd_offset, u->read.rd_length); } static __be32 @@ -1001,6 +1014,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (write->wr_offset >= OFFSET_MAX) return nfserr_inval; + cnt = write->wr_buflen; + trace_nfsd_write_start(rqstp, &cstate->current_fh, + write->wr_offset, cnt); status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, stateid, WR_STATE, &filp, NULL); if (status) { @@ -1008,7 +1024,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } - cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp)); @@ -1021,7 +1036,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fput(filp); write->wr_bytes_written = cnt; - + trace_nfsd_write_done(rqstp, &cstate->current_fh, + write->wr_offset, cnt); return status; } @@ -1106,7 +1122,6 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, else { copy->cp_res.wr_bytes_written = bytes; copy->cp_res.wr_stable_how = NFS_UNSTABLE; - copy->cp_consecutive = 1; copy->cp_synchronous = 1; gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp)); status = nfs_ok; @@ -1412,7 +1427,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp, nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid, true, lgp->lg_layout_type, &ls); if (nfserr) { - trace_layout_get_lookup_fail(&lgp->lg_sid); + trace_nfsd_layout_get_lookup_fail(&lgp->lg_sid); goto out; } @@ -1481,7 +1496,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp, false, lcp->lc_layout_type, &ls); if (nfserr) { - trace_layout_commit_lookup_fail(&lcp->lc_sid); + trace_nfsd_layout_commit_lookup_fail(&lcp->lc_sid); /* fixup error code as per RFC5661 */ if (nfserr == nfserr_bad_stateid) nfserr = nfserr_badlayout; @@ -1714,12 +1729,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) goto encode_op; } + trace_nfsd_compound(rqstp, args->opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; - dprintk("nfsv4 compound op #%d/%d: %d (%s)\n", - resp->opcnt, args->opcnt, op->opnum, - nfsd4_op_name(op->opnum)); /* * The XDR decode routines may have pre-set op->status; * for example, if there is a miscellaneous XDR error @@ -1793,9 +1806,8 @@ encode_op: status = op->status; } - dprintk("nfsv4 compound op %p opcnt %d #%d: %d: status %d\n", - args->ops, args->opcnt, resp->opcnt, op->opnum, - be32_to_cpu(status)); + trace_nfsd_compound_status(args->opcnt, resp->opcnt, status, + nfsd4_op_name(op->opnum)); nfsd4_cstate_clear_replay(cstate); nfsd4_increment_op_stats(op->opnum); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 61b770e39809..fc74d6f46bd5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -98,6 +98,7 @@ enum nfsd4_st_mutex_lock_subclass { */ static DECLARE_WAIT_QUEUE_HEAD(close_wq); +static struct kmem_cache *client_slab; static struct kmem_cache *openowner_slab; static struct kmem_cache *lockowner_slab; static struct kmem_cache *file_slab; @@ -806,7 +807,8 @@ static void block_delegations(struct knfsd_fh *fh) } static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, + struct svc_fh *current_fh, struct nfs4_clnt_odstate *odstate) { struct nfs4_delegation *dp; @@ -837,6 +839,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, dp->dl_retries = 1; nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); + get_nfs4_file(fp); + dp->dl_stid.sc_file = fp; return dp; out_dec: atomic_long_dec(&num_delegations); @@ -874,19 +878,35 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid) spin_unlock(&stid->sc_lock); } -static void nfs4_put_deleg_lease(struct nfs4_file *fp) +static void put_deleg_file(struct nfs4_file *fp) { struct file *filp = NULL; spin_lock(&fp->fi_lock); - if (fp->fi_deleg_file && --fp->fi_delegees == 0) + if (--fp->fi_delegees == 0) swap(filp, fp->fi_deleg_file); spin_unlock(&fp->fi_lock); - if (filp) { - vfs_setlease(filp, F_UNLCK, NULL, (void **)&fp); + if (filp) fput(filp); - } +} + +static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp) +{ + struct nfs4_file *fp = dp->dl_stid.sc_file; + struct file *filp = fp->fi_deleg_file; + + WARN_ON_ONCE(!fp->fi_delegees); + + vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp); + put_deleg_file(fp); +} + +static void destroy_unhashed_deleg(struct nfs4_delegation *dp) +{ + put_clnt_odstate(dp->dl_clnt_odstate); + nfs4_unlock_deleg_lease(dp); + nfs4_put_stid(&dp->dl_stid); } void nfs4_unhash_stid(struct nfs4_stid *s) @@ -895,20 +915,16 @@ void nfs4_unhash_stid(struct nfs4_stid *s) } /** - * nfs4_get_existing_delegation - Discover if this delegation already exists + * nfs4_delegation_exists - Discover if this delegation already exists * @clp: a pointer to the nfs4_client we're granting a delegation to * @fp: a pointer to the nfs4_file we're granting a delegation on * * Return: - * On success: NULL if an existing delegation was not found. - * - * On error: -EAGAIN if one was previously granted to this nfs4_client - * for this nfs4_file. - * + * On success: true iff an existing delegation is found */ -static int -nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) +static bool +nfs4_delegation_exists(struct nfs4_client *clp, struct nfs4_file *fp) { struct nfs4_delegation *searchdp = NULL; struct nfs4_client *searchclp = NULL; @@ -919,10 +935,10 @@ nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { searchclp = searchdp->dl_stid.sc_client; if (clp == searchclp) { - return -EAGAIN; + return true; } } - return 0; + return false; } /** @@ -941,16 +957,13 @@ nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) static int hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { - int status; struct nfs4_client *clp = dp->dl_stid.sc_client; lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); - status = nfs4_get_existing_delegation(clp, fp); - if (status) - return status; - ++fp->fi_delegees; + if (nfs4_delegation_exists(clp, fp)) + return -EAGAIN; refcount_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); @@ -986,11 +999,8 @@ static void destroy_delegation(struct nfs4_delegation *dp) spin_lock(&state_lock); unhashed = unhash_delegation_locked(dp); spin_unlock(&state_lock); - if (unhashed) { - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_deleg_lease(dp->dl_stid.sc_file); - nfs4_put_stid(&dp->dl_stid); - } + if (unhashed) + destroy_unhashed_deleg(dp); } static void revoke_delegation(struct nfs4_delegation *dp) @@ -999,17 +1009,14 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_deleg_lease(dp->dl_stid.sc_file); - - if (clp->cl_minorversion == 0) - nfs4_put_stid(&dp->dl_stid); - else { + if (clp->cl_minorversion) { dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; + refcount_inc(&dp->dl_stid.sc_count); spin_lock(&clp->cl_lock); list_add(&dp->dl_recall_lru, &clp->cl_revoked); spin_unlock(&clp->cl_lock); } + destroy_unhashed_deleg(dp); } /* @@ -1794,7 +1801,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) struct nfs4_client *clp; int i; - clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); + clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); @@ -1825,7 +1832,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) err_no_hashtbl: kfree(clp->cl_name.data); err_no_name: - kfree(clp); + kmem_cache_free(client_slab, clp); return NULL; } @@ -1845,7 +1852,7 @@ free_client(struct nfs4_client *clp) kfree(clp->cl_ownerstr_hashtbl); kfree(clp->cl_name.data); idr_destroy(&clp->cl_stateids); - kfree(clp); + kmem_cache_free(client_slab, clp); } /* must be called under the client_lock */ @@ -1911,9 +1918,7 @@ __destroy_client(struct nfs4_client *clp) while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_deleg_lease(dp->dl_stid.sc_file); - nfs4_put_stid(&dp->dl_stid); + destroy_unhashed_deleg(dp); } while (!list_empty(&clp->cl_revoked)) { dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru); @@ -2953,7 +2958,7 @@ out_no_session: static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) { if (!session) - return 0; + return false; return !memcmp(sid, &session->se_sessionid, sizeof(*sid)); } @@ -3471,21 +3476,26 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, void nfsd4_free_slabs(void) { - kmem_cache_destroy(odstate_slab); + kmem_cache_destroy(client_slab); kmem_cache_destroy(openowner_slab); kmem_cache_destroy(lockowner_slab); kmem_cache_destroy(file_slab); kmem_cache_destroy(stateid_slab); kmem_cache_destroy(deleg_slab); + kmem_cache_destroy(odstate_slab); } int nfsd4_init_slabs(void) { + client_slab = kmem_cache_create("nfsd4_clients", + sizeof(struct nfs4_client), 0, 0, NULL); + if (client_slab == NULL) + goto out; openowner_slab = kmem_cache_create("nfsd4_openowners", sizeof(struct nfs4_openowner), 0, 0, NULL); if (openowner_slab == NULL) - goto out; + goto out_free_client_slab; lockowner_slab = kmem_cache_create("nfsd4_lockowners", sizeof(struct nfs4_lockowner), 0, 0, NULL); if (lockowner_slab == NULL) @@ -3518,6 +3528,8 @@ out_free_lockowner_slab: kmem_cache_destroy(lockowner_slab); out_free_openowner_slab: kmem_cache_destroy(openowner_slab); +out_free_client_slab: + kmem_cache_destroy(client_slab); out: dprintk("nfsd4: out of memory while initializing nfsv4\n"); return -ENOMEM; @@ -3945,17 +3957,9 @@ static bool nfsd_break_deleg_cb(struct file_lock *fl) { bool ret = false; - struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; - struct nfs4_delegation *dp; + struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; + struct nfs4_file *fp = dp->dl_stid.sc_file; - if (!fp) { - WARN(1, "(%p)->fl_owner NULL\n", fl); - return ret; - } - if (fp->fi_had_conflict) { - WARN(1, "duplicate break on %p\n", fp); - return ret; - } /* * We don't want the locks code to timeout the lease for us; * we'll remove it ourself if a delegation isn't returned @@ -3965,15 +3969,7 @@ nfsd_break_deleg_cb(struct file_lock *fl) spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; - /* - * If there are no delegations on the list, then return true - * so that the lease code will go ahead and delete it. - */ - if (list_empty(&fp->fi_delegations)) - ret = true; - else - list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) - nfsd_break_one_deleg(dp); + nfsd_break_one_deleg(dp); spin_unlock(&fp->fi_lock); return ret; } @@ -4297,7 +4293,8 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } -static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) +static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, + int flag) { struct file_lock *fl; @@ -4308,124 +4305,88 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) fl->fl_flags = FL_DELEG; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_end = OFFSET_MAX; - fl->fl_owner = (fl_owner_t)fp; + fl->fl_owner = (fl_owner_t)dp; fl->fl_pid = current->tgid; + fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file; return fl; } -/** - * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer - * @dp: a pointer to the nfs4_delegation we're adding. - * - * Return: - * On success: Return code will be 0 on success. - * - * On error: -EAGAIN if there was an existing delegation. - * nonzero if there is an error in other cases. - * - */ - -static int nfs4_setlease(struct nfs4_delegation *dp) -{ - struct nfs4_file *fp = dp->dl_stid.sc_file; - struct file_lock *fl; - struct file *filp; - int status = 0; - - fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ); - if (!fl) - return -ENOMEM; - filp = find_readable_file(fp); - if (!filp) { - /* We should always have a readable file here */ - WARN_ON_ONCE(1); - locks_free_lock(fl); - return -EBADF; - } - fl->fl_file = filp; - status = vfs_setlease(filp, fl->fl_type, &fl, NULL); - if (fl) - locks_free_lock(fl); - if (status) - goto out_fput; - spin_lock(&state_lock); - spin_lock(&fp->fi_lock); - /* Did the lease get broken before we took the lock? */ - status = -EAGAIN; - if (fp->fi_had_conflict) - goto out_unlock; - /* Race breaker */ - if (fp->fi_deleg_file) { - status = hash_delegation_locked(dp, fp); - goto out_unlock; - } - fp->fi_deleg_file = filp; - fp->fi_delegees = 0; - status = hash_delegation_locked(dp, fp); - spin_unlock(&fp->fi_lock); - spin_unlock(&state_lock); - if (status) { - /* Should never happen, this is a new fi_deleg_file */ - WARN_ON_ONCE(1); - goto out_fput; - } - return 0; -out_unlock: - spin_unlock(&fp->fi_lock); - spin_unlock(&state_lock); -out_fput: - fput(filp); - return status; -} - static struct nfs4_delegation * nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) { - int status; + int status = 0; struct nfs4_delegation *dp; + struct file *filp; + struct file_lock *fl; + /* + * The fi_had_conflict and nfs_get_existing_delegation checks + * here are just optimizations; we'll need to recheck them at + * the end: + */ if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); + filp = find_readable_file(fp); + if (!filp) { + /* We should always have a readable file here */ + WARN_ON_ONCE(1); + return ERR_PTR(-EBADF); + } spin_lock(&state_lock); spin_lock(&fp->fi_lock); - status = nfs4_get_existing_delegation(clp, fp); + if (nfs4_delegation_exists(clp, fp)) + status = -EAGAIN; + else if (!fp->fi_deleg_file) { + fp->fi_deleg_file = filp; + /* increment early to prevent fi_deleg_file from being + * cleared */ + fp->fi_delegees = 1; + filp = NULL; + } else + fp->fi_delegees++; spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); - + if (filp) + fput(filp); if (status) return ERR_PTR(status); - dp = alloc_init_deleg(clp, fh, odstate); + status = -ENOMEM; + dp = alloc_init_deleg(clp, fp, fh, odstate); if (!dp) - return ERR_PTR(-ENOMEM); + goto out_delegees; + + fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); + if (!fl) + goto out_stid; + + status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL); + if (fl) + locks_free_lock(fl); + if (status) + goto out_clnt_odstate; - get_nfs4_file(fp); spin_lock(&state_lock); spin_lock(&fp->fi_lock); - dp->dl_stid.sc_file = fp; - if (!fp->fi_deleg_file) { - spin_unlock(&fp->fi_lock); - spin_unlock(&state_lock); - status = nfs4_setlease(dp); - goto out; - } - if (fp->fi_had_conflict) { + if (fp->fi_had_conflict) status = -EAGAIN; - goto out_unlock; - } - status = hash_delegation_locked(dp, fp); -out_unlock: + else + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); -out: - if (status) { - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_stid(&dp->dl_stid); - return ERR_PTR(status); - } + + if (status) + destroy_unhashed_deleg(dp); return dp; +out_clnt_odstate: + put_clnt_odstate(dp->dl_clnt_odstate); +out_stid: + nfs4_put_stid(&dp->dl_stid); +out_delegees: + put_deleg_file(fp); + return ERR_PTR(status); } static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) @@ -5521,15 +5482,26 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; stp->st_stid.sc_type = NFS4_CLOSED_STID; + + /* + * Technically we don't _really_ have to increment or copy it, since + * it should just be gone after this operation and we clobber the + * copied value below, but we continue to do so here just to ensure + * that racing ops see that there was a state change. + */ nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); nfsd4_close_open_stateid(stp); mutex_unlock(&stp->st_mutex); - /* See RFC5661 sectionm 18.2.4 */ - if (stp->st_stid.sc_client->cl_minorversion) - memcpy(&close->cl_stateid, &close_stateid, - sizeof(close->cl_stateid)); + /* v4.1+ suggests that we send a special stateid in here, since the + * clients should just ignore this anyway. Since this is not useful + * for v4.0 clients either, we set it to the special close_stateid + * universally. + * + * See RFC5661 section 18.2.4, and RFC7530 section 16.2.5 + */ + memcpy(&close->cl_stateid, &close_stateid, sizeof(close->cl_stateid)); /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); @@ -7264,9 +7236,7 @@ nfs4_state_shutdown_net(struct net *net) list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_deleg_lease(dp->dl_stid.sc_file); - nfs4_put_stid(&dp->dl_stid); + destroy_unhashed_deleg(dp); } nfsd4_client_tracking_exit(net); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e502fd16246b..1d048dd95464 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -33,7 +33,6 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <linux/fs_struct.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/namei.h> @@ -682,7 +681,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl, &create->cr_label, - ¤t->fs->umask); + &create->cr_umask); if (status) goto out; @@ -927,7 +926,6 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) case NFS4_OPEN_NOCREATE: break; case NFS4_OPEN_CREATE: - current->fs->umask = 0; READ_BUF(4); open->op_createmode = be32_to_cpup(p++); switch (open->op_createmode) { @@ -935,7 +933,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) case NFS4_CREATE_GUARDED: status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl, &open->op_label, - ¤t->fs->umask); + &open->op_umask); if (status) goto out; break; @@ -950,7 +948,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl, &open->op_label, - ¤t->fs->umask); + &open->op_umask); if (status) goto out; break; @@ -1759,7 +1757,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) p = xdr_decode_hyper(p, ©->cp_src_pos); p = xdr_decode_hyper(p, ©->cp_dst_pos); p = xdr_decode_hyper(p, ©->cp_count); - copy->cp_consecutive = be32_to_cpup(p++); + p++; /* ca_consecutive: we always do consecutive copies */ copy->cp_synchronous = be32_to_cpup(p++); tmp = be32_to_cpup(p); /* Source server list not supported */ @@ -3427,8 +3425,9 @@ static __be32 nfsd4_encode_splice_read( return nfserr_resource; len = maxcount; - nfserr = nfsd_splice_read(read->rd_rqstp, file, - read->rd_offset, &maxcount); + nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, + file, read->rd_offset, &maxcount); + read->rd_length = maxcount; if (nfserr) { /* * nfsd_splice_actor may have already messed with the @@ -3511,8 +3510,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, read->rd_vlen = v; len = maxcount; - nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec, - read->rd_vlen, &maxcount); + nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, + resp->rqstp->rq_vec, read->rd_vlen, &maxcount); + read->rd_length = maxcount; if (nfserr) return nfserr; xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); @@ -4214,7 +4214,7 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr; p = xdr_reserve_space(&resp->xdr, 4 + 4); - *p++ = cpu_to_be32(copy->cp_consecutive); + *p++ = xdr_one; /* cr_consecutive */ *p++ = cpu_to_be32(copy->cp_synchronous); return 0; } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 8aa011820c4a..a008e7634181 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -87,13 +87,23 @@ nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry, return nfserr_inval; } +static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, int flags) +{ + if (flags & NFSEXP_INSECURE_PORT) + return true; + /* We don't require gss requests to use low ports: */ + if (rqstp->rq_cred.cr_flavor >= RPC_AUTH_GSS) + return true; + return test_bit(RQ_SECURE, &rqstp->rq_flags); +} + static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, struct svc_export *exp) { int flags = nfsexp_flags(rqstp, exp); /* Check if the request originated from a secure port. */ - if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && !(flags & NFSEXP_INSECURE_PORT)) { + if (!nfsd_originating_port_ok(rqstp, flags)) { RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); dprintk("nfsd: request from insecure port %s!\n", svc_print_addr(rqstp, buf, sizeof(buf))); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 43c0419b8ddb..f107f9fa8e15 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -212,13 +212,18 @@ nfsd_proc_write(struct svc_rqst *rqstp) struct nfsd_attrstat *resp = rqstp->rq_resp; __be32 nfserr; unsigned long cnt = argp->len; + unsigned int nvecs; dprintk("nfsd: WRITE %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, - rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC); + nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt); + if (!nvecs) + return nfserr_io; + nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), + argp->offset, rqstp->rq_vec, nvecs, + &cnt, NFS_DATA_SYNC); return nfsd_return_attrs(nfserr, resp); } @@ -444,17 +449,19 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) struct svc_fh newfh; __be32 nfserr; + if (argp->tlen > NFS_MAXPATHLEN) + return nfserr_nametoolong; + + argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, + argp->tlen); + if (IS_ERR(argp->tname)) + return nfserrno(PTR_ERR(argp->tname)); + dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n", SVCFH_fmt(&argp->ffh), argp->flen, argp->fname, argp->tlen, argp->tname); fh_init(&newfh, NFS_FHSIZE); - /* - * Crazy hack: the request fits in a page, and already-decoded - * attributes follow argp->tname, so it's safe to just write a - * null to ensure it's null-terminated: - */ - argp->tname[argp->tlen] = '\0'; nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, argp->tname, &newfh); diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 79b6064f8977..a43e8260520a 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -71,22 +71,6 @@ decode_filename(__be32 *p, char **namp, unsigned int *lenp) } static __be32 * -decode_pathname(__be32 *p, char **namp, unsigned int *lenp) -{ - char *name; - unsigned int i; - - if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) { - for (i = 0, name = *namp; i < *lenp; i++, name++) { - if (*name == '\0') - return NULL; - } - } - - return p; -} - -static __be32 * decode_sattr(__be32 *p, struct iattr *iap) { u32 tmp, tmp1; @@ -287,7 +271,6 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) struct nfsd_writeargs *args = rqstp->rq_argp; unsigned int len, hdr, dlen; struct kvec *head = rqstp->rq_arg.head; - int v; p = decode_fh(p, &args->fh); if (!p) @@ -323,17 +306,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) if (dlen < XDR_QUADLEN(len)*4) return 0; - rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = head->iov_len - hdr; - v = 0; - while (len > rqstp->rq_vec[v].iov_len) { - len -= rqstp->rq_vec[v].iov_len; - v++; - rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]); - rqstp->rq_vec[v].iov_len = PAGE_SIZE; - } - rqstp->rq_vec[v].iov_len = len; - args->vlen = v + 1; + args->first.iov_base = (void *)p; + args->first.iov_len = head->iov_len - hdr; return 1; } @@ -394,14 +368,39 @@ int nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_symlinkargs *args = rqstp->rq_argp; + char *base = (char *)p; + size_t xdrlen; if ( !(p = decode_fh(p, &args->ffh)) - || !(p = decode_filename(p, &args->fname, &args->flen)) - || !(p = decode_pathname(p, &args->tname, &args->tlen))) + || !(p = decode_filename(p, &args->fname, &args->flen))) return 0; - p = decode_sattr(p, &args->attrs); - return xdr_argsize_check(rqstp, p); + args->tlen = ntohl(*p++); + if (args->tlen == 0) + return 0; + + args->first.iov_base = p; + args->first.iov_len = rqstp->rq_arg.head[0].iov_len; + args->first.iov_len -= (char *)p - base; + + /* This request is never larger than a page. Therefore, + * transport will deliver either: + * 1. pathname in the pagelist -> sattr is in the tail. + * 2. everything in the head buffer -> sattr is in the head. + */ + if (rqstp->rq_arg.page_len) { + if (args->tlen != rqstp->rq_arg.page_len) + return 0; + p = rqstp->rq_arg.tail[0].iov_base; + } else { + xdrlen = XDR_QUADLEN(args->tlen); + if (xdrlen > args->first.iov_len - (8 * sizeof(__be32))) + return 0; + p += xdrlen; + } + decode_sattr(p, &args->attrs); + + return 1; } int diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 8b2f1d92c579..80933e4334d8 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -11,39 +11,79 @@ #include <linux/tracepoint.h> #include "nfsfh.h" +TRACE_EVENT(nfsd_compound, + TP_PROTO(const struct svc_rqst *rqst, + u32 args_opcnt), + TP_ARGS(rqst, args_opcnt), + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, args_opcnt) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->args_opcnt = args_opcnt; + ), + TP_printk("xid=0x%08x opcnt=%u", + __entry->xid, __entry->args_opcnt) +) + +TRACE_EVENT(nfsd_compound_status, + TP_PROTO(u32 args_opcnt, + u32 resp_opcnt, + __be32 status, + const char *name), + TP_ARGS(args_opcnt, resp_opcnt, status, name), + TP_STRUCT__entry( + __field(u32, args_opcnt) + __field(u32, resp_opcnt) + __field(int, status) + __string(name, name) + ), + TP_fast_assign( + __entry->args_opcnt = args_opcnt; + __entry->resp_opcnt = resp_opcnt; + __entry->status = be32_to_cpu(status); + __assign_str(name, name); + ), + TP_printk("op=%u/%u %s status=%d", + __entry->resp_opcnt, __entry->args_opcnt, + __get_str(name), __entry->status) +) + DECLARE_EVENT_CLASS(nfsd_io_class, TP_PROTO(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, - int len), + unsigned long len), TP_ARGS(rqstp, fhp, offset, len), TP_STRUCT__entry( - __field(__be32, xid) - __field_struct(struct knfsd_fh, fh) + __field(u32, xid) + __field(u32, fh_hash) __field(loff_t, offset) - __field(int, len) + __field(unsigned long, len) ), TP_fast_assign( - __entry->xid = rqstp->rq_xid, - fh_copy_shallow(&__entry->fh, &fhp->fh_handle); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); __entry->offset = offset; __entry->len = len; ), - TP_printk("xid=0x%x fh=0x%x offset=%lld len=%d", - __be32_to_cpu(__entry->xid), knfsd_fh_hash(&__entry->fh), + TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu", + __entry->xid, __entry->fh_hash, __entry->offset, __entry->len) ) #define DEFINE_NFSD_IO_EVENT(name) \ -DEFINE_EVENT(nfsd_io_class, name, \ +DEFINE_EVENT(nfsd_io_class, nfsd_##name, \ TP_PROTO(struct svc_rqst *rqstp, \ struct svc_fh *fhp, \ loff_t offset, \ - int len), \ + unsigned long len), \ TP_ARGS(rqstp, fhp, offset, len)) DEFINE_NFSD_IO_EVENT(read_start); -DEFINE_NFSD_IO_EVENT(read_opened); +DEFINE_NFSD_IO_EVENT(read_splice); +DEFINE_NFSD_IO_EVENT(read_vector); DEFINE_NFSD_IO_EVENT(read_io_done); DEFINE_NFSD_IO_EVENT(read_done); DEFINE_NFSD_IO_EVENT(write_start); @@ -51,6 +91,40 @@ DEFINE_NFSD_IO_EVENT(write_opened); DEFINE_NFSD_IO_EVENT(write_io_done); DEFINE_NFSD_IO_EVENT(write_done); +DECLARE_EVENT_CLASS(nfsd_err_class, + TP_PROTO(struct svc_rqst *rqstp, + struct svc_fh *fhp, + loff_t offset, + int status), + TP_ARGS(rqstp, fhp, offset, status), + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, fh_hash) + __field(loff_t, offset) + __field(int, status) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->offset = offset; + __entry->status = status; + ), + TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld status=%d", + __entry->xid, __entry->fh_hash, + __entry->offset, __entry->status) +) + +#define DEFINE_NFSD_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_err_class, nfsd_##name, \ + TP_PROTO(struct svc_rqst *rqstp, \ + struct svc_fh *fhp, \ + loff_t offset, \ + int len), \ + TP_ARGS(rqstp, fhp, offset, len)) + +DEFINE_NFSD_ERR_EVENT(read_err); +DEFINE_NFSD_ERR_EVENT(write_err); + #include "state.h" DECLARE_EVENT_CLASS(nfsd_stateid_class, @@ -76,7 +150,7 @@ DECLARE_EVENT_CLASS(nfsd_stateid_class, ) #define DEFINE_STATEID_EVENT(name) \ -DEFINE_EVENT(nfsd_stateid_class, name, \ +DEFINE_EVENT(nfsd_stateid_class, nfsd_##name, \ TP_PROTO(stateid_t *stp), \ TP_ARGS(stp)) DEFINE_STATEID_EVENT(layoutstate_alloc); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a3c9bfa77def..2410b093a2e6 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -881,20 +881,24 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, return __splice_from_pipe(pipe, sd, nfsd_splice_actor); } -static __be32 -nfsd_finish_read(struct file *file, unsigned long *count, int host_err) +static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, + unsigned long *count, int host_err) { if (host_err >= 0) { nfsdstats.io_read += host_err; *count = host_err; fsnotify_access(file); + trace_nfsd_read_io_done(rqstp, fhp, offset, *count); return 0; - } else + } else { + trace_nfsd_read_err(rqstp, fhp, offset, host_err); return nfserrno(host_err); + } } -__be32 nfsd_splice_read(struct svc_rqst *rqstp, - struct file *file, loff_t offset, unsigned long *count) +__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, unsigned long *count) { struct splice_desc sd = { .len = 0, @@ -904,21 +908,23 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, }; int host_err; + trace_nfsd_read_splice(rqstp, fhp, offset, *count); rqstp->rq_next_page = rqstp->rq_respages + 1; host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); - return nfsd_finish_read(file, count, host_err); + return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err); } -__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, - unsigned long *count) +__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, + struct kvec *vec, int vlen, unsigned long *count) { struct iov_iter iter; int host_err; + trace_nfsd_read_vector(rqstp, fhp, offset, *count); iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count); host_err = vfs_iter_read(file, &iter, &offset, 0); - - return nfsd_finish_read(file, count, host_err); + return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err); } /* @@ -965,13 +971,15 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, { struct svc_export *exp; struct iov_iter iter; - __be32 err = 0; + __be32 nfserr; int host_err; int use_wgather; loff_t pos = offset; unsigned int pflags = current->flags; rwf_t flags = 0; + trace_nfsd_write_opened(rqstp, fhp, offset, *cnt); + if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) /* * We want less throttling in balance_dirty_pages() @@ -994,22 +1002,23 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, host_err = vfs_iter_write(file, &iter, &pos, flags); if (host_err < 0) goto out_nfserr; - *cnt = host_err; - nfsdstats.io_write += host_err; + nfsdstats.io_write += *cnt; fsnotify_modify(file); if (stable && use_wgather) host_err = wait_for_concurrent_writes(file); out_nfserr: - dprintk("nfsd: write complete host_err=%d\n", host_err); - if (host_err >= 0) - err = 0; - else - err = nfserrno(host_err); + if (host_err >= 0) { + trace_nfsd_write_io_done(rqstp, fhp, offset, *cnt); + nfserr = nfs_ok; + } else { + trace_nfsd_write_err(rqstp, fhp, offset, host_err); + nfserr = nfserrno(host_err); + } if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) current_restore_flags(pflags, PF_LESS_THROTTLE); - return err; + return nfserr; } /* @@ -1024,27 +1033,23 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct raparms *ra; __be32 err; - trace_read_start(rqstp, fhp, offset, vlen); + trace_nfsd_read_start(rqstp, fhp, offset, *count); err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); if (err) return err; ra = nfsd_init_raparms(file); - trace_read_opened(rqstp, fhp, offset, vlen); - if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags)) - err = nfsd_splice_read(rqstp, file, offset, count); + err = nfsd_splice_read(rqstp, fhp, file, offset, count); else - err = nfsd_readv(file, offset, vec, vlen, count); - - trace_read_io_done(rqstp, fhp, offset, vlen); + err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count); if (ra) nfsd_put_raparams(file, ra); fput(file); - trace_read_done(rqstp, fhp, offset, vlen); + trace_nfsd_read_done(rqstp, fhp, offset, *count); return err; } @@ -1061,18 +1066,16 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, struct file *file = NULL; __be32 err = 0; - trace_write_start(rqstp, fhp, offset, vlen); + trace_nfsd_write_start(rqstp, fhp, offset, *cnt); err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); if (err) goto out; - trace_write_opened(rqstp, fhp, offset, vlen); err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable); - trace_write_io_done(rqstp, fhp, offset, vlen); fput(file); out: - trace_write_done(rqstp, fhp, offset, vlen); + trace_nfsd_write_done(rqstp, fhp, offset, *cnt); return err; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index be6d8e00453f..a7e107309f76 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -78,10 +78,13 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); struct raparms; -__be32 nfsd_splice_read(struct svc_rqst *, - struct file *, loff_t, unsigned long *); -__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int, - unsigned long *); +__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, + unsigned long *count); +__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, + struct kvec *vec, int vlen, + unsigned long *count); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 2f4f22e6b8cb..ea7cca3a64b7 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -34,7 +34,7 @@ struct nfsd_writeargs { svc_fh fh; __u32 offset; int len; - int vlen; + struct kvec first; }; struct nfsd_createargs { @@ -72,6 +72,7 @@ struct nfsd_symlinkargs { char * tname; unsigned int tlen; struct iattr attrs; + struct kvec first; }; struct nfsd_readdirargs { diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 056bf8a7364e..2cb29e961a76 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -41,7 +41,7 @@ struct nfsd3_writeargs { __u32 count; int stable; __u32 len; - int vlen; + struct kvec first; }; struct nfsd3_createargs { @@ -90,6 +90,7 @@ struct nfsd3_symlinkargs { char * tname; unsigned int tlen; struct iattr attrs; + struct kvec first; }; struct nfsd3_readdirargs { diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index bc29511b6405..17c453a7999c 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -110,6 +110,7 @@ struct nfsd4_create { struct { u32 datalen; char *data; + struct kvec first; } link; /* NF4LNK */ struct { u32 specdata1; @@ -118,12 +119,14 @@ struct nfsd4_create { } u; u32 cr_bmval[3]; /* request */ struct iattr cr_iattr; /* request */ + int cr_umask; /* request */ struct nfsd4_change_info cr_cinfo; /* response */ struct nfs4_acl *cr_acl; struct xdr_netobj cr_label; }; #define cr_datalen u.link.datalen #define cr_data u.link.data +#define cr_first u.link.first #define cr_specdata1 u.dev.specdata1 #define cr_specdata2 u.dev.specdata2 @@ -228,6 +231,7 @@ struct nfsd4_open { u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */ u32 op_create; /* request */ u32 op_createmode; /* request */ + int op_umask; /* request */ u32 op_bmval[3]; /* request */ struct iattr op_iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ nfs4_verifier op_verf __attribute__((aligned(32))); @@ -518,7 +522,6 @@ struct nfsd4_copy { u64 cp_count; /* both */ - bool cp_consecutive; bool cp_synchronous; /* response */ diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 6702a6a0bbb5..d51e1bb781cf 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -139,23 +139,32 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, return false; } -struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, +struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, + struct inode *inode, u32 mask, const struct path *path) { struct fanotify_event_info *event; + gfp_t gfp = GFP_KERNEL; + + /* + * For queues with unlimited length lost events are not expected and + * can possibly have security implications. Avoid losing events when + * memory is short. + */ + if (group->max_events == UINT_MAX) + gfp |= __GFP_NOFAIL; if (fanotify_is_perm_event(mask)) { struct fanotify_perm_event_info *pevent; - pevent = kmem_cache_alloc(fanotify_perm_event_cachep, - GFP_KERNEL); + pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); if (!pevent) return NULL; event = &pevent->fae; pevent->response = 0; goto init; } - event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); + event = kmem_cache_alloc(fanotify_event_cachep, gfp); if (!event) return NULL; init: __maybe_unused @@ -210,10 +219,17 @@ static int fanotify_handle_event(struct fsnotify_group *group, return 0; } - event = fanotify_alloc_event(inode, mask, data); + event = fanotify_alloc_event(group, inode, mask, data); ret = -ENOMEM; - if (unlikely(!event)) + if (unlikely(!event)) { + /* + * We don't queue overflow events for permission events as + * there the access is denied and so no event is in fact lost. + */ + if (!fanotify_is_perm_event(mask)) + fsnotify_queue_overflow(group); goto finish; + } fsn_event = &event->fse; ret = fsnotify_add_event(group, fsn_event, fanotify_merge); diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 256d9d1ddea9..8609ba06f474 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -52,5 +52,6 @@ static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) return container_of(fse, struct fanotify_event_info, fse); } -struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, +struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, + struct inode *inode, u32 mask, const struct path *path); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index fa803a58a605..ec4d8c59d0e3 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -757,7 +757,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.user = user; atomic_inc(&user->fanotify_listeners); - oevent = fanotify_alloc_event(NULL, FS_Q_OVERFLOW, NULL); + oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL); if (unlikely(!oevent)) { fd = -ENOMEM; goto out_destroy_group; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 8b73332735ba..40dedb37a1f3 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -99,8 +99,14 @@ int inotify_handle_event(struct fsnotify_group *group, fsn_mark); event = kmalloc(alloc_len, GFP_KERNEL); - if (unlikely(!event)) + if (unlikely(!event)) { + /* + * Treat lost event due to ENOMEM the same way as queue + * overflow to let userspace know event was lost. + */ + fsnotify_queue_overflow(group); return -ENOMEM; + } fsn_event = &event->fse; fsnotify_init_event(fsn_event, inode, mask); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 43c23653ce2e..ef32f3657958 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -307,6 +307,20 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, spin_unlock(&group->notification_lock); ret = put_user(send_len, (int __user *) p); break; +#ifdef CONFIG_CHECKPOINT_RESTORE + case INOTIFY_IOC_SETNEXTWD: + ret = -EINVAL; + if (arg >= 1 && arg <= INT_MAX) { + struct inotify_group_private_data *data; + + data = &group->inotify_data; + spin_lock(&data->idr_lock); + idr_set_cursor(&data->idr, (unsigned int)arg); + spin_unlock(&data->idr_lock); + ret = 0; + } + break; +#endif /* CONFIG_CHECKPOINT_RESTORE */ } return ret; diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 66f85c651c52..3c3e36745f59 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -111,7 +111,8 @@ int fsnotify_add_event(struct fsnotify_group *group, return 2; } - if (group->q_len >= group->max_events) { + if (event == group->overflow_event || + group->q_len >= group->max_events) { ret = 2; /* Queue overflow event only if it isn't already queued */ if (!list_empty(&group->overflow_event->list)) { diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 2831f495a674..32c523cf5a2d 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -381,7 +381,7 @@ unm_err_out: * vfs inode dirty. This ensures that any changes to the mft record are * written out to disk. * - * NOTE: We only set I_DIRTY_SYNC and I_DIRTY_DATASYNC (and not I_DIRTY_PAGES) + * NOTE: We only set I_DIRTY_DATASYNC (and not I_DIRTY_PAGES) * on the base vfs inode, because even though file data may have been modified, * it is dirty in the inode meta data rather than the data page cache of the * inode, and thus there are no data pages that need writing out. Therefore, a @@ -407,7 +407,7 @@ void __mark_mft_record_dirty(ntfs_inode *ni) else base_ni = ni->ext.base_ntfs_ino; mutex_unlock(&ni->extent_lock); - __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_SYNC | I_DIRTY_DATASYNC); + __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_DATASYNC); } static const char *ntfs_please_email = "Please email " diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 9a876bb07cac..0f157bbd3e0f 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7119,7 +7119,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, goto out_commit; did_quota = 1; - data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; + data_ac->ac_resv = &oi->ip_la_data_resv; ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off, &num); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index e8e205bf2e41..302cd7caa4a7 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -346,7 +346,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) unlock = 0; out_alloc: - up_read(&OCFS2_I(inode)->ip_alloc_sem); + up_read(&oi->ip_alloc_sem); out_inode_unlock: ocfs2_inode_unlock(inode, 0); out: @@ -2213,7 +2213,7 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, down_write(&oi->ip_alloc_sem); if (first_get_block) { - if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) + if (ocfs2_sparse_alloc(osb)) ret = ocfs2_zero_tail(inode, di_bh, pos); else ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 8614ff069d99..3494a62ed749 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -78,7 +78,7 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level) /* * Using a named enum representing lock types in terms of #N bit stored in * iocb->private, which is going to be used for communication between - * ocfs2_dio_end_io() and ocfs2_file_aio_write/read(). + * ocfs2_dio_end_io() and ocfs2_file_write/read_iter(). */ enum ocfs2_iocb_lock_bits { OCFS2_IOCB_RW_LOCK = 0, diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index ea8c551bcd7e..91a8889abf9b 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -570,7 +570,16 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, current_page, vec_len, vec_start); len = bio_add_page(bio, page, vec_len, vec_start); - if (len != vec_len) break; + if (len != vec_len) { + mlog(ML_ERROR, "Adding page[%d] to bio failed, " + "page %p, len %d, vec_len %u, vec_start %u, " + "bi_sector %llu\n", current_page, page, len, + vec_len, vec_start, + (unsigned long long)bio->bi_iter.bi_sector); + bio_put(bio); + bio = ERR_PTR(-EIO); + return bio; + } cs += vec_len / (PAGE_SIZE/spp); vec_start = 0; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 977763d4c27d..b048d4fa3959 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -3072,7 +3072,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, * We need to return the correct block within the * cluster which should hold our entry. */ - off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), + off = ocfs2_dx_dir_hash_idx(osb, &lookup->dl_hinfo); get_bh(dx_leaves[off]); lookup->dl_dx_leaf_bh = dx_leaves[off]; diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index fd6bbbbd7d78..39831fc2fd52 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -224,14 +224,12 @@ void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, struct dlm_lock *lock) { dlm_astlockfunc_t *fn; - struct dlm_lockstatus *lksb; mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name, res->lockname.len, res->lockname.name, dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); - lksb = lock->lksb; fn = lock->ast; BUG_ON(lock->ml.node != dlm->node_num); diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index e9f3705c4c9f..d06e27ec4be4 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -140,6 +140,7 @@ struct dlm_ctxt u8 node_num; u32 key; u8 joining_node; + u8 migrate_done; /* set to 1 means node has migrated all lock resources */ wait_queue_head_t dlm_join_events; unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; @@ -960,13 +961,10 @@ static inline int dlm_send_proxy_ast(struct dlm_ctxt *dlm, void dlm_print_one_lock_resource(struct dlm_lock_resource *res); void __dlm_print_one_lock_resource(struct dlm_lock_resource *res); -u8 dlm_nm_this_node(struct dlm_ctxt *dlm); void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); -int dlm_nm_init(struct dlm_ctxt *dlm); -int dlm_heartbeat_init(struct dlm_ctxt *dlm); void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data); void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data); diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index e1fea149f50b..425081be6161 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -461,6 +461,19 @@ redo_bucket: cond_resched_lock(&dlm->spinlock); num += n; } + + if (!num) { + if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { + mlog(0, "%s: perhaps there are more lock resources " + "need to be migrated after dlm recovery\n", dlm->name); + ret = -EAGAIN; + } else { + mlog(0, "%s: we won't do dlm recovery after migrating " + "all lock resources\n", dlm->name); + dlm->migrate_done = 1; + } + } + spin_unlock(&dlm->spinlock); wake_up(&dlm->dlm_thread_wq); @@ -675,20 +688,6 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm) spin_unlock(&dlm->spinlock); } -int dlm_shutting_down(struct dlm_ctxt *dlm) -{ - int ret = 0; - - spin_lock(&dlm_domain_lock); - - if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN) - ret = 1; - - spin_unlock(&dlm_domain_lock); - - return ret; -} - void dlm_unregister_domain(struct dlm_ctxt *dlm) { int leave = 0; @@ -2052,6 +2051,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; init_waitqueue_head(&dlm->dlm_join_events); + dlm->migrate_done = 0; + dlm->reco.new_master = O2NM_INVALID_NODE_NUM; dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; diff --git a/fs/ocfs2/dlm/dlmdomain.h b/fs/ocfs2/dlm/dlmdomain.h index fd6122a38dbd..8a9281411c18 100644 --- a/fs/ocfs2/dlm/dlmdomain.h +++ b/fs/ocfs2/dlm/dlmdomain.h @@ -28,7 +28,30 @@ extern spinlock_t dlm_domain_lock; extern struct list_head dlm_domains; -int dlm_shutting_down(struct dlm_ctxt *dlm); +static inline int dlm_joined(struct dlm_ctxt *dlm) +{ + int ret = 0; + + spin_lock(&dlm_domain_lock); + if (dlm->dlm_state == DLM_CTXT_JOINED) + ret = 1; + spin_unlock(&dlm_domain_lock); + + return ret; +} + +static inline int dlm_shutting_down(struct dlm_ctxt *dlm) +{ + int ret = 0; + + spin_lock(&dlm_domain_lock); + if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN) + ret = 1; + spin_unlock(&dlm_domain_lock); + + return ret; +} + void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, int node_num); diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 66c2a491f68d..74962315794e 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -77,8 +77,7 @@ int dlm_init_lock_cache(void) void dlm_destroy_lock_cache(void) { - if (dlm_lock_cache) - kmem_cache_destroy(dlm_lock_cache); + kmem_cache_destroy(dlm_lock_cache); } /* Tell us whether we can grant a new lock request. diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a7df226f9449..aaca0949fe53 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -414,8 +414,7 @@ int dlm_init_mle_cache(void) void dlm_destroy_mle_cache(void) { - if (dlm_mle_cache) - kmem_cache_destroy(dlm_mle_cache); + kmem_cache_destroy(dlm_mle_cache); } static void dlm_mle_release(struct kref *kref) @@ -472,15 +471,11 @@ bail: void dlm_destroy_master_caches(void) { - if (dlm_lockname_cache) { - kmem_cache_destroy(dlm_lockname_cache); - dlm_lockname_cache = NULL; - } + kmem_cache_destroy(dlm_lockname_cache); + dlm_lockname_cache = NULL; - if (dlm_lockres_cache) { - kmem_cache_destroy(dlm_lockres_cache); - dlm_lockres_cache = NULL; - } + kmem_cache_destroy(dlm_lockres_cache); + dlm_lockres_cache = NULL; } static void dlm_lockres_release(struct kref *kref) @@ -2495,13 +2490,13 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) } /* - * A migrateable resource is one that is : + * A migratable resource is one that is : * 1. locally mastered, and, * 2. zero local locks, and, * 3. one or more non-local locks, or, one or more references * Returns 1 if yes, 0 if not. */ -static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, +static int dlm_is_lockres_migratable(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { enum dlm_lockres_list idx; @@ -2532,7 +2527,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, continue; } cookie = be64_to_cpu(lock->ml.cookie); - mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on " + mlog(0, "%s: Not migratable res %.*s, lock %u:%llu on " "%s list\n", dlm->name, res->lockname.len, res->lockname.name, dlm_get_lock_cookie_node(cookie), @@ -2548,7 +2543,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, return 0; } - mlog(0, "%s: res %.*s, Migrateable\n", dlm->name, res->lockname.len, + mlog(0, "%s: res %.*s, Migratable\n", dlm->name, res->lockname.len, res->lockname.name); return 1; @@ -2792,7 +2787,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) assert_spin_locked(&dlm->spinlock); spin_lock(&res->spinlock); - if (dlm_is_lockres_migrateable(dlm, res)) + if (dlm_is_lockres_migratable(dlm, res)) target = dlm_pick_migration_target(dlm, res); spin_unlock(&res->spinlock); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index ec8f75813beb..802636d50365 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -62,7 +62,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node); static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node); static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, u8 dead_node); -static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node); +static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm); static inline int dlm_num_locks_in_lockres(struct dlm_lock_resource *res); static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres, @@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm) static void dlm_begin_recovery(struct dlm_ctxt *dlm) { - spin_lock(&dlm->spinlock); + assert_spin_locked(&dlm->spinlock); BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n", dlm->name, dlm->reco.dead_node); dlm->reco.state |= DLM_RECO_STATE_ACTIVE; - spin_unlock(&dlm->spinlock); } static void dlm_end_recovery(struct dlm_ctxt *dlm) @@ -456,6 +455,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) spin_lock(&dlm->spinlock); + if (dlm->migrate_done) { + mlog(0, "%s: no need do recovery after migrating all " + "lock resources\n", dlm->name); + spin_unlock(&dlm->spinlock); + return 0; + } + /* check to see if the new master has died */ if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM && test_bit(dlm->reco.new_master, dlm->recovery_map)) { @@ -490,12 +496,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n", dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.dead_node); - spin_unlock(&dlm->spinlock); /* take write barrier */ /* (stops the list reshuffling thread, proxy ast handling) */ dlm_begin_recovery(dlm); + spin_unlock(&dlm->spinlock); + if (dlm->reco.new_master == dlm->node_num) goto master_here; @@ -739,7 +746,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) } if (destroy) - dlm_destroy_recovery_area(dlm, dead_node); + dlm_destroy_recovery_area(dlm); return status; } @@ -764,7 +771,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) ndata = kzalloc(sizeof(*ndata), GFP_NOFS); if (!ndata) { - dlm_destroy_recovery_area(dlm, dead_node); + dlm_destroy_recovery_area(dlm); return -ENOMEM; } ndata->node_num = num; @@ -778,7 +785,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) return 0; } -static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) +static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm) { struct dlm_reco_node_data *ndata, *next; LIST_HEAD(tmplist); @@ -1378,6 +1385,15 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, if (!dlm_grab(dlm)) return -EINVAL; + if (!dlm_joined(dlm)) { + mlog(ML_ERROR, "Domain %s not joined! " + "lockres %.*s, master %u\n", + dlm->name, mres->lockname_len, + mres->lockname, mres->master); + dlm_put(dlm); + return -EINVAL; + } + BUG_ON(!(mres->flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION))); real_master = mres->master; @@ -1807,7 +1823,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, int i, j, bad; struct dlm_lock *lock; u8 from = O2NM_MAX_NODES; - unsigned int added = 0; __be64 c; mlog(0, "running %d locks for this lockres\n", mres->num_locks); @@ -1823,7 +1838,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, spin_lock(&res->spinlock); dlm_lockres_set_refmap_bit(dlm, res, from); spin_unlock(&res->spinlock); - added++; break; } BUG_ON(ml->highest_blocked != LKM_IVMODE); @@ -1911,7 +1925,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, /* do not alter lock refcount. switching lists. */ list_move_tail(&lock->list, queue); spin_unlock(&res->spinlock); - added++; mlog(0, "just reordered a local lock!\n"); continue; @@ -2037,7 +2050,6 @@ skip_lvb: "setting refmap bit\n", dlm->name, res->lockname.len, res->lockname.name, ml->node); dlm_lockres_set_refmap_bit(dlm, res, ml->node); - added++; } spin_unlock(&res->spinlock); } @@ -2331,13 +2343,6 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, __dlm_dirty_lockres(dlm, res); } -/* if this node is the recovery master, and there are no - * locks for a given lockres owned by this node that are in - * either PR or EX mode, zero out the lvb before requesting. - * - */ - - static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) { struct dlm_lock_resource *res; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 9479f99c2145..97a972efab83 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1756,8 +1756,7 @@ int ocfs2_rw_lock(struct inode *inode, int write) level = write ? DLM_LOCK_EX : DLM_LOCK_PR; - status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, - 0); + status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); if (status < 0) mlog_errno(status); @@ -1796,7 +1795,7 @@ void ocfs2_rw_unlock(struct inode *inode, int write) write ? "EXMODE" : "PRMODE"); if (!ocfs2_mount_local(osb)) - ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); + ocfs2_cluster_unlock(osb, lockres, level); } /* @@ -1816,8 +1815,7 @@ int ocfs2_open_lock(struct inode *inode) lockres = &OCFS2_I(inode)->ip_open_lockres; - status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, - DLM_LOCK_PR, 0, 0); + status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_PR, 0, 0); if (status < 0) mlog_errno(status); @@ -1854,8 +1852,7 @@ int ocfs2_try_open_lock(struct inode *inode, int write) * other nodes and the -EAGAIN will indicate to the caller that * this inode is still in use. */ - status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, - level, DLM_LKF_NOQUEUE, 0); + status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0); out: return status; @@ -1876,11 +1873,9 @@ void ocfs2_open_unlock(struct inode *inode) goto out; if(lockres->l_ro_holders) - ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, - DLM_LOCK_PR); + ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_PR); if(lockres->l_ex_holders) - ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, - DLM_LOCK_EX); + ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); out: return; @@ -2601,9 +2596,9 @@ void ocfs2_inode_unlock(struct inode *inode, (unsigned long long)OCFS2_I(inode)->ip_blkno, ex ? "EXMODE" : "PRMODE"); - if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && + if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) - ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); + ocfs2_cluster_unlock(osb, lockres, level); } /* @@ -3537,7 +3532,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that * we can recover correctly from node failure. Otherwise, we may get - * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. + * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. */ if (!ocfs2_is_o2cb_active() && lockres->l_ops->flags & LOCK_TYPE_USES_LVB) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 5d1784a365a3..6ee94bc23f5b 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -101,7 +101,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) struct ocfs2_inode_info *oi = OCFS2_I(inode); trace_ocfs2_file_open(inode, file, file->f_path.dentry, - (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)oi->ip_blkno, file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name, mode); @@ -116,7 +116,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) /* Check that the inode hasn't been wiped from disk by another * node. If it hasn't then we're safe as long as we hold the * spin lock until our increment of open count. */ - if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { + if (oi->ip_flags & OCFS2_INODE_DELETED) { spin_unlock(&oi->ip_lock); status = -ENOENT; @@ -190,7 +190,7 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end, bool needs_barrier = false; trace_ocfs2_sync_file(inode, file, file->f_path.dentry, - OCFS2_I(inode)->ip_blkno, + oi->ip_blkno, file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name, (unsigned long long)datasync); @@ -296,7 +296,7 @@ int ocfs2_update_inode_atime(struct inode *inode, ocfs2_journal_dirty(handle, bh); out_commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); + ocfs2_commit_trans(osb, handle); out: return ret; } @@ -2257,7 +2257,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0; - trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, + trace_ocfs2_file_write_iter(inode, file, file->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name, @@ -2405,7 +2405,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0; - trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, + trace_ocfs2_file_read_iter(inode, filp, filp->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, filp->f_path.dentry->d_name.len, filp->f_path.dentry->d_name.name, @@ -2448,7 +2448,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, * * Take and drop the meta data lock to update inode fields * like i_size. This allows the checks down below - * generic_file_aio_read() a chance of actually working. + * generic_file_read_iter() a chance of actually working. */ ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level, !nowait); @@ -2460,7 +2460,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, ocfs2_inode_unlock(inode, lock_level); ret = generic_file_read_iter(iocb, to); - trace_generic_file_aio_read_ret(ret); + trace_generic_file_read_iter_ret(ret); /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c index 6b92cb241138..f65f2b2f594d 100644 --- a/fs/ocfs2/filecheck.c +++ b/fs/ocfs2/filecheck.c @@ -53,36 +53,6 @@ static const char * const ocfs2_filecheck_errs[] = { "UNSUPPORTED" }; -static DEFINE_SPINLOCK(ocfs2_filecheck_sysfs_lock); -static LIST_HEAD(ocfs2_filecheck_sysfs_list); - -struct ocfs2_filecheck { - struct list_head fc_head; /* File check entry list head */ - spinlock_t fc_lock; - unsigned int fc_max; /* Maximum number of entry in list */ - unsigned int fc_size; /* Current entry count in list */ - unsigned int fc_done; /* Finished entry count in list */ -}; - -struct ocfs2_filecheck_sysfs_entry { /* sysfs entry per mounting */ - struct list_head fs_list; - atomic_t fs_count; - struct super_block *fs_sb; - struct kset *fs_devicekset; - struct kset *fs_fcheckkset; - struct ocfs2_filecheck *fs_fcheck; -}; - -#define OCFS2_FILECHECK_MAXSIZE 100 -#define OCFS2_FILECHECK_MINSIZE 10 - -/* File check operation type */ -enum { - OCFS2_FILECHECK_TYPE_CHK = 0, /* Check a file(inode) */ - OCFS2_FILECHECK_TYPE_FIX, /* Fix a file(inode) */ - OCFS2_FILECHECK_TYPE_SET = 100 /* Set entry list maximum size */ -}; - struct ocfs2_filecheck_entry { struct list_head fe_list; unsigned long fe_ino; @@ -110,35 +80,84 @@ ocfs2_filecheck_error(int errno) return ocfs2_filecheck_errs[errno - OCFS2_FILECHECK_ERR_START + 1]; } -static ssize_t ocfs2_filecheck_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf); -static ssize_t ocfs2_filecheck_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count); -static struct kobj_attribute ocfs2_attr_filecheck_chk = +static ssize_t ocfs2_filecheck_attr_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf); +static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count); +static struct kobj_attribute ocfs2_filecheck_attr_chk = __ATTR(check, S_IRUSR | S_IWUSR, - ocfs2_filecheck_show, - ocfs2_filecheck_store); -static struct kobj_attribute ocfs2_attr_filecheck_fix = + ocfs2_filecheck_attr_show, + ocfs2_filecheck_attr_store); +static struct kobj_attribute ocfs2_filecheck_attr_fix = __ATTR(fix, S_IRUSR | S_IWUSR, - ocfs2_filecheck_show, - ocfs2_filecheck_store); -static struct kobj_attribute ocfs2_attr_filecheck_set = + ocfs2_filecheck_attr_show, + ocfs2_filecheck_attr_store); +static struct kobj_attribute ocfs2_filecheck_attr_set = __ATTR(set, S_IRUSR | S_IWUSR, - ocfs2_filecheck_show, - ocfs2_filecheck_store); + ocfs2_filecheck_attr_show, + ocfs2_filecheck_attr_store); +static struct attribute *ocfs2_filecheck_attrs[] = { + &ocfs2_filecheck_attr_chk.attr, + &ocfs2_filecheck_attr_fix.attr, + &ocfs2_filecheck_attr_set.attr, + NULL +}; + +static void ocfs2_filecheck_release(struct kobject *kobj) +{ + struct ocfs2_filecheck_sysfs_entry *entry = container_of(kobj, + struct ocfs2_filecheck_sysfs_entry, fs_kobj); + + complete(&entry->fs_kobj_unregister); +} + +static ssize_t +ocfs2_filecheck_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + ssize_t ret = -EIO; + struct kobj_attribute *kattr = container_of(attr, + struct kobj_attribute, attr); + + kobject_get(kobj); + if (kattr->show) + ret = kattr->show(kobj, kattr, buf); + kobject_put(kobj); + return ret; +} + +static ssize_t +ocfs2_filecheck_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + ssize_t ret = -EIO; + struct kobj_attribute *kattr = container_of(attr, + struct kobj_attribute, attr); + + kobject_get(kobj); + if (kattr->store) + ret = kattr->store(kobj, kattr, buf, count); + kobject_put(kobj); + return ret; +} + +static const struct sysfs_ops ocfs2_filecheck_ops = { + .show = ocfs2_filecheck_show, + .store = ocfs2_filecheck_store, +}; + +static struct kobj_type ocfs2_ktype_filecheck = { + .default_attrs = ocfs2_filecheck_attrs, + .sysfs_ops = &ocfs2_filecheck_ops, + .release = ocfs2_filecheck_release, +}; static void ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry) { struct ocfs2_filecheck_entry *p; - if (!atomic_dec_and_test(&entry->fs_count)) { - wait_var_event(&entry->fs_count, - !atomic_read(&entry->fs_count)); - } - spin_lock(&entry->fs_fcheck->fc_lock); while (!list_empty(&entry->fs_fcheck->fc_head)) { p = list_first_entry(&entry->fs_fcheck->fc_head, @@ -149,151 +168,48 @@ ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry) } spin_unlock(&entry->fs_fcheck->fc_lock); - kset_unregister(entry->fs_fcheckkset); - kset_unregister(entry->fs_devicekset); kfree(entry->fs_fcheck); - kfree(entry); -} - -static void -ocfs2_filecheck_sysfs_add(struct ocfs2_filecheck_sysfs_entry *entry) -{ - spin_lock(&ocfs2_filecheck_sysfs_lock); - list_add_tail(&entry->fs_list, &ocfs2_filecheck_sysfs_list); - spin_unlock(&ocfs2_filecheck_sysfs_lock); + entry->fs_fcheck = NULL; } -static int ocfs2_filecheck_sysfs_del(const char *devname) +int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb) { - struct ocfs2_filecheck_sysfs_entry *p; - - spin_lock(&ocfs2_filecheck_sysfs_lock); - list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) { - if (!strcmp(p->fs_sb->s_id, devname)) { - list_del(&p->fs_list); - spin_unlock(&ocfs2_filecheck_sysfs_lock); - ocfs2_filecheck_sysfs_free(p); - return 0; - } - } - spin_unlock(&ocfs2_filecheck_sysfs_lock); - return 1; -} - -static void -ocfs2_filecheck_sysfs_put(struct ocfs2_filecheck_sysfs_entry *entry) -{ - if (atomic_dec_and_test(&entry->fs_count)) - wake_up_var(&entry->fs_count); -} - -static struct ocfs2_filecheck_sysfs_entry * -ocfs2_filecheck_sysfs_get(const char *devname) -{ - struct ocfs2_filecheck_sysfs_entry *p = NULL; - - spin_lock(&ocfs2_filecheck_sysfs_lock); - list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) { - if (!strcmp(p->fs_sb->s_id, devname)) { - atomic_inc(&p->fs_count); - spin_unlock(&ocfs2_filecheck_sysfs_lock); - return p; - } - } - spin_unlock(&ocfs2_filecheck_sysfs_lock); - return NULL; -} - -int ocfs2_filecheck_create_sysfs(struct super_block *sb) -{ - int ret = 0; - struct kset *device_kset = NULL; - struct kset *fcheck_kset = NULL; - struct ocfs2_filecheck *fcheck = NULL; - struct ocfs2_filecheck_sysfs_entry *entry = NULL; - struct attribute **attrs = NULL; - struct attribute_group attrgp; - - if (!ocfs2_kset) - return -ENOMEM; - - attrs = kmalloc(sizeof(struct attribute *) * 4, GFP_NOFS); - if (!attrs) { - ret = -ENOMEM; - goto error; - } else { - attrs[0] = &ocfs2_attr_filecheck_chk.attr; - attrs[1] = &ocfs2_attr_filecheck_fix.attr; - attrs[2] = &ocfs2_attr_filecheck_set.attr; - attrs[3] = NULL; - memset(&attrgp, 0, sizeof(attrgp)); - attrgp.attrs = attrs; - } + int ret; + struct ocfs2_filecheck *fcheck; + struct ocfs2_filecheck_sysfs_entry *entry = &osb->osb_fc_ent; fcheck = kmalloc(sizeof(struct ocfs2_filecheck), GFP_NOFS); - if (!fcheck) { - ret = -ENOMEM; - goto error; - } else { - INIT_LIST_HEAD(&fcheck->fc_head); - spin_lock_init(&fcheck->fc_lock); - fcheck->fc_max = OCFS2_FILECHECK_MINSIZE; - fcheck->fc_size = 0; - fcheck->fc_done = 0; - } - - if (strlen(sb->s_id) <= 0) { - mlog(ML_ERROR, - "Cannot get device basename when create filecheck sysfs\n"); - ret = -ENODEV; - goto error; - } - - device_kset = kset_create_and_add(sb->s_id, NULL, &ocfs2_kset->kobj); - if (!device_kset) { - ret = -ENOMEM; - goto error; - } - - fcheck_kset = kset_create_and_add("filecheck", NULL, - &device_kset->kobj); - if (!fcheck_kset) { - ret = -ENOMEM; - goto error; - } - - ret = sysfs_create_group(&fcheck_kset->kobj, &attrgp); - if (ret) - goto error; + if (!fcheck) + return -ENOMEM; - entry = kmalloc(sizeof(struct ocfs2_filecheck_sysfs_entry), GFP_NOFS); - if (!entry) { - ret = -ENOMEM; - goto error; - } else { - atomic_set(&entry->fs_count, 1); - entry->fs_sb = sb; - entry->fs_devicekset = device_kset; - entry->fs_fcheckkset = fcheck_kset; - entry->fs_fcheck = fcheck; - ocfs2_filecheck_sysfs_add(entry); + INIT_LIST_HEAD(&fcheck->fc_head); + spin_lock_init(&fcheck->fc_lock); + fcheck->fc_max = OCFS2_FILECHECK_MINSIZE; + fcheck->fc_size = 0; + fcheck->fc_done = 0; + + entry->fs_kobj.kset = osb->osb_dev_kset; + init_completion(&entry->fs_kobj_unregister); + ret = kobject_init_and_add(&entry->fs_kobj, &ocfs2_ktype_filecheck, + NULL, "filecheck"); + if (ret) { + kfree(fcheck); + return ret; } - kfree(attrs); + entry->fs_fcheck = fcheck; return 0; - -error: - kfree(attrs); - kfree(entry); - kfree(fcheck); - kset_unregister(fcheck_kset); - kset_unregister(device_kset); - return ret; } -int ocfs2_filecheck_remove_sysfs(struct super_block *sb) +void ocfs2_filecheck_remove_sysfs(struct ocfs2_super *osb) { - return ocfs2_filecheck_sysfs_del(sb->s_id); + if (!osb->osb_fc_ent.fs_fcheck) + return; + + kobject_del(&osb->osb_fc_ent.fs_kobj); + kobject_put(&osb->osb_fc_ent.fs_kobj); + wait_for_completion(&osb->osb_fc_ent.fs_kobj_unregister); + ocfs2_filecheck_sysfs_free(&osb->osb_fc_ent); } static int @@ -310,7 +226,7 @@ ocfs2_filecheck_adjust_max(struct ocfs2_filecheck_sysfs_entry *ent, spin_lock(&ent->fs_fcheck->fc_lock); if (len < (ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done)) { - mlog(ML_ERROR, + mlog(ML_NOTICE, "Cannot set online file check maximum entry number " "to %u due to too many pending entries(%u)\n", len, ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done); @@ -387,7 +303,7 @@ ocfs2_filecheck_args_parse(const char *name, const char *buf, size_t count, return 0; } -static ssize_t ocfs2_filecheck_show(struct kobject *kobj, +static ssize_t ocfs2_filecheck_attr_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -395,19 +311,12 @@ static ssize_t ocfs2_filecheck_show(struct kobject *kobj, ssize_t ret = 0, total = 0, remain = PAGE_SIZE; unsigned int type; struct ocfs2_filecheck_entry *p; - struct ocfs2_filecheck_sysfs_entry *ent; + struct ocfs2_filecheck_sysfs_entry *ent = container_of(kobj, + struct ocfs2_filecheck_sysfs_entry, fs_kobj); if (ocfs2_filecheck_type_parse(attr->attr.name, &type)) return -EINVAL; - ent = ocfs2_filecheck_sysfs_get(kobj->parent->name); - if (!ent) { - mlog(ML_ERROR, - "Cannot get the corresponding entry via device basename %s\n", - kobj->name); - return -ENODEV; - } - if (type == OCFS2_FILECHECK_TYPE_SET) { spin_lock(&ent->fs_fcheck->fc_lock); total = snprintf(buf, remain, "%u\n", ent->fs_fcheck->fc_max); @@ -441,11 +350,26 @@ static ssize_t ocfs2_filecheck_show(struct kobject *kobj, spin_unlock(&ent->fs_fcheck->fc_lock); exit: - ocfs2_filecheck_sysfs_put(ent); return total; } -static int +static inline int +ocfs2_filecheck_is_dup_entry(struct ocfs2_filecheck_sysfs_entry *ent, + unsigned long ino) +{ + struct ocfs2_filecheck_entry *p; + + list_for_each_entry(p, &ent->fs_fcheck->fc_head, fe_list) { + if (!p->fe_done) { + if (p->fe_ino == ino) + return 1; + } + } + + return 0; +} + +static inline int ocfs2_filecheck_erase_entry(struct ocfs2_filecheck_sysfs_entry *ent) { struct ocfs2_filecheck_entry *p; @@ -484,21 +408,21 @@ static void ocfs2_filecheck_done_entry(struct ocfs2_filecheck_sysfs_entry *ent, struct ocfs2_filecheck_entry *entry) { - entry->fe_done = 1; spin_lock(&ent->fs_fcheck->fc_lock); + entry->fe_done = 1; ent->fs_fcheck->fc_done++; spin_unlock(&ent->fs_fcheck->fc_lock); } static unsigned int -ocfs2_filecheck_handle(struct super_block *sb, +ocfs2_filecheck_handle(struct ocfs2_super *osb, unsigned long ino, unsigned int flags) { unsigned int ret = OCFS2_FILECHECK_ERR_SUCCESS; struct inode *inode = NULL; int rc; - inode = ocfs2_iget(OCFS2_SB(sb), ino, flags, 0); + inode = ocfs2_iget(osb, ino, flags, 0); if (IS_ERR(inode)) { rc = (int)(-(long)inode); if (rc >= OCFS2_FILECHECK_ERR_START && @@ -516,11 +440,14 @@ static void ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent, struct ocfs2_filecheck_entry *entry) { + struct ocfs2_super *osb = container_of(ent, struct ocfs2_super, + osb_fc_ent); + if (entry->fe_type == OCFS2_FILECHECK_TYPE_CHK) - entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb, + entry->fe_status = ocfs2_filecheck_handle(osb, entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_CHK); else if (entry->fe_type == OCFS2_FILECHECK_TYPE_FIX) - entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb, + entry->fe_status = ocfs2_filecheck_handle(osb, entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_FIX); else entry->fe_status = OCFS2_FILECHECK_ERR_UNSUPPORTED; @@ -528,30 +455,21 @@ ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent, ocfs2_filecheck_done_entry(ent, entry); } -static ssize_t ocfs2_filecheck_store(struct kobject *kobj, +static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { + ssize_t ret = 0; struct ocfs2_filecheck_args args; struct ocfs2_filecheck_entry *entry; - struct ocfs2_filecheck_sysfs_entry *ent; - ssize_t ret = 0; + struct ocfs2_filecheck_sysfs_entry *ent = container_of(kobj, + struct ocfs2_filecheck_sysfs_entry, fs_kobj); if (count == 0) return count; - if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args)) { - mlog(ML_ERROR, "Invalid arguments for online file check\n"); + if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args)) return -EINVAL; - } - - ent = ocfs2_filecheck_sysfs_get(kobj->parent->name); - if (!ent) { - mlog(ML_ERROR, - "Cannot get the corresponding entry via device basename %s\n", - kobj->parent->name); - return -ENODEV; - } if (args.fa_type == OCFS2_FILECHECK_TYPE_SET) { ret = ocfs2_filecheck_adjust_max(ent, args.fa_len); @@ -565,13 +483,16 @@ static ssize_t ocfs2_filecheck_store(struct kobject *kobj, } spin_lock(&ent->fs_fcheck->fc_lock); - if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) && - (ent->fs_fcheck->fc_done == 0)) { - mlog(ML_ERROR, + if (ocfs2_filecheck_is_dup_entry(ent, args.fa_ino)) { + ret = -EEXIST; + kfree(entry); + } else if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) && + (ent->fs_fcheck->fc_done == 0)) { + mlog(ML_NOTICE, "Cannot do more file check " "since file check queue(%u) is full now\n", ent->fs_fcheck->fc_max); - ret = -EBUSY; + ret = -EAGAIN; kfree(entry); } else { if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) && @@ -596,6 +517,5 @@ static ssize_t ocfs2_filecheck_store(struct kobject *kobj, ocfs2_filecheck_handle_entry(ent, entry); exit: - ocfs2_filecheck_sysfs_put(ent); return (!ret ? count : ret); } diff --git a/fs/ocfs2/filecheck.h b/fs/ocfs2/filecheck.h index e5cd002a2c09..6a22ee79e8d0 100644 --- a/fs/ocfs2/filecheck.h +++ b/fs/ocfs2/filecheck.h @@ -43,7 +43,32 @@ enum { #define OCFS2_FILECHECK_ERR_START OCFS2_FILECHECK_ERR_FAILED #define OCFS2_FILECHECK_ERR_END OCFS2_FILECHECK_ERR_UNSUPPORTED -int ocfs2_filecheck_create_sysfs(struct super_block *sb); -int ocfs2_filecheck_remove_sysfs(struct super_block *sb); +struct ocfs2_filecheck { + struct list_head fc_head; /* File check entry list head */ + spinlock_t fc_lock; + unsigned int fc_max; /* Maximum number of entry in list */ + unsigned int fc_size; /* Current entry count in list */ + unsigned int fc_done; /* Finished entry count in list */ +}; + +#define OCFS2_FILECHECK_MAXSIZE 100 +#define OCFS2_FILECHECK_MINSIZE 10 + +/* File check operation type */ +enum { + OCFS2_FILECHECK_TYPE_CHK = 0, /* Check a file(inode) */ + OCFS2_FILECHECK_TYPE_FIX, /* Fix a file(inode) */ + OCFS2_FILECHECK_TYPE_SET = 100 /* Set entry list maximum size */ +}; + +struct ocfs2_filecheck_sysfs_entry { /* sysfs entry per partition */ + struct kobject fs_kobj; + struct completion fs_kobj_unregister; + struct ocfs2_filecheck *fs_fcheck; +}; + + +int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb); +void ocfs2_filecheck_remove_sysfs(struct ocfs2_super *osb); #endif /* FILECHECK_H */ diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index d51b80edd972..ddc3e9470c87 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1135,7 +1135,7 @@ static void ocfs2_clear_inode(struct inode *inode) trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, inode->i_nlink); - mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, + mlog_bug_on_msg(osb == NULL, "Inode=%lu\n", inode->i_ino); dquot_drop(inode); @@ -1150,7 +1150,7 @@ static void ocfs2_clear_inode(struct inode *inode) ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres); ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres); - ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap, + ocfs2_resv_discard(&osb->osb_la_resmap, &oi->ip_la_data_resv); ocfs2_resv_init_once(&oi->ip_la_data_resv); @@ -1160,7 +1160,7 @@ static void ocfs2_clear_inode(struct inode *inode) * exception here are successfully wiped inodes - their * metadata can now be considered to be part of the system * inodes from which it came. */ - if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED)) + if (!(oi->ip_flags & OCFS2_INODE_DELETED)) ocfs2_checkpoint_inode(inode); mlog_bug_on_msg(!list_empty(&oi->ip_io_markers), @@ -1223,7 +1223,7 @@ static void ocfs2_clear_inode(struct inode *inode) * the journal is flushed before journal shutdown. Thus it is safe to * have inodes get cleaned up after journal shutdown. */ - jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, + jbd2_journal_release_jbd_inode(osb->journal->j_journal, &oi->ip_jinode); } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index c801eddc4bf3..8dd6f703c819 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -525,7 +525,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, * these are used by the support functions here and in * callers. */ inode->i_ino = ino_from_blkno(osb->sb, fe_blkno); - OCFS2_I(inode)->ip_blkno = fe_blkno; + oi->ip_blkno = fe_blkno; spin_lock(&osb->osb_lock); inode->i_generation = osb->s_next_generation++; spin_unlock(&osb->osb_lock); @@ -1186,8 +1186,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, } trace_ocfs2_double_lock_end( - (unsigned long long)OCFS2_I(inode1)->ip_blkno, - (unsigned long long)OCFS2_I(inode2)->ip_blkno); + (unsigned long long)oi1->ip_blkno, + (unsigned long long)oi2->ip_blkno); bail: if (status) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 6867eef2e06b..4f86ac0027b5 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -50,6 +50,8 @@ #include "reservations.h" +#include "filecheck.h" + /* Caching of metadata buffers */ /* Most user visible OCFS2 inodes will have very few pieces of @@ -472,6 +474,12 @@ struct ocfs2_super * workqueue and schedule on our own. */ struct workqueue_struct *ocfs2_wq; + + /* sysfs directory per partition */ + struct kset *osb_dev_kset; + + /* file check related stuff */ + struct ocfs2_filecheck_sysfs_entry osb_fc_ent; }; #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index e2a11aaece10..2ee76a90ba8f 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -1311,11 +1311,11 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_release); DEFINE_OCFS2_FILE_OPS(ocfs2_sync_file); -DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write); +DEFINE_OCFS2_FILE_OPS(ocfs2_file_write_iter); DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write); -DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read); +DEFINE_OCFS2_FILE_OPS(ocfs2_file_read_iter); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file); @@ -1467,7 +1467,7 @@ TRACE_EVENT(ocfs2_prepare_inode_for_write, __entry->saved_pos, __entry->count, __entry->wait) ); -DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret); +DEFINE_OCFS2_INT_EVENT(generic_file_read_iter_ret); /* End of trace events for fs/ocfs2/file.c. */ diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index ab156e35ec00..01c6b3894406 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -573,7 +573,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode, BUG_ON(ocfs2_is_refcount_inode(inode)); trace_ocfs2_create_refcount_tree( - (unsigned long long)OCFS2_I(inode)->ip_blkno); + (unsigned long long)oi->ip_blkno); ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); if (ret) { @@ -3359,7 +3359,7 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) unsigned int ext_flags; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { + if (!ocfs2_refcount_tree(osb)) { return ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n", inode->i_ino); } @@ -3707,7 +3707,7 @@ int ocfs2_add_refcount_flag(struct inode *inode, trace_ocfs2_add_refcount_flag(ref_blocks, credits); if (ref_blocks) { - ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), + ret = ocfs2_reserve_new_metadata_blocks(osb, ref_blocks, &meta_ac); if (ret) { mlog_errno(ret); @@ -4766,8 +4766,8 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode, *bh2 = *bh1; trace_ocfs2_double_lock_end( - (unsigned long long)OCFS2_I(inode1)->ip_blkno, - (unsigned long long)OCFS2_I(inode2)->ip_blkno); + (unsigned long long)oi1->ip_blkno, + (unsigned long long)oi2->ip_blkno); return 0; diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index dae9eb7c441e..d2fb97b173da 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -398,7 +398,7 @@ static int ocfs2_control_do_setnode_msg(struct file *file, static int ocfs2_control_do_setversion_msg(struct file *file, struct ocfs2_control_message_setv *msg) - { +{ long major, minor; char *ptr = NULL; struct ocfs2_control_private *p = file->private_data; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index d8f5f6ce99dc..f7c972fbed6a 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -79,8 +79,6 @@ static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res) return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset); } -static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); -static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); static int ocfs2_block_group_fill(handle_t *handle, struct inode *alloc_inode, @@ -387,7 +385,7 @@ static int ocfs2_block_group_fill(handle_t *handle, memset(bg, 0, sb->s_blocksize); strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); - bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); + bg->bg_generation = cpu_to_le32(osb->fs_generation); bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1, osb->s_feature_incompat)); bg->bg_chain = cpu_to_le16(my_chain); @@ -1521,7 +1519,7 @@ static int ocfs2_cluster_group_search(struct inode *inode, OCFS2_I(inode)->ip_clusters, max_bits); } - ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), + ret = ocfs2_block_group_find_clear_bits(osb, group_bh, bits_wanted, max_bits, res); if (ret) @@ -2626,53 +2624,6 @@ int ocfs2_release_clusters(handle_t *handle, _ocfs2_clear_bit); } -static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) -{ - printk("Block Group:\n"); - printk("bg_signature: %s\n", bg->bg_signature); - printk("bg_size: %u\n", bg->bg_size); - printk("bg_bits: %u\n", bg->bg_bits); - printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count); - printk("bg_chain: %u\n", bg->bg_chain); - printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation)); - printk("bg_next_group: %llu\n", - (unsigned long long)bg->bg_next_group); - printk("bg_parent_dinode: %llu\n", - (unsigned long long)bg->bg_parent_dinode); - printk("bg_blkno: %llu\n", - (unsigned long long)bg->bg_blkno); -} - -static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) -{ - int i; - - printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno); - printk("i_signature: %s\n", fe->i_signature); - printk("i_size: %llu\n", - (unsigned long long)fe->i_size); - printk("i_clusters: %u\n", fe->i_clusters); - printk("i_generation: %u\n", - le32_to_cpu(fe->i_generation)); - printk("id1.bitmap1.i_used: %u\n", - le32_to_cpu(fe->id1.bitmap1.i_used)); - printk("id1.bitmap1.i_total: %u\n", - le32_to_cpu(fe->id1.bitmap1.i_total)); - printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg); - printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc); - printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count); - printk("id2.i_chain.cl_next_free_rec: %u\n", - fe->id2.i_chain.cl_next_free_rec); - for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) { - printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i, - fe->id2.i_chain.cl_recs[i].c_free); - printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i, - fe->id2.i_chain.cl_recs[i].c_total); - printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i, - (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); - } -} - /* * For a given allocation, determine which allocators will need to be * accessed, and lock them, reserving the appropriate number of bits. diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index ffa4952d432b..3415e0b09398 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -423,10 +423,10 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait) ocfs2_schedule_truncate_log_flush(osb, 0); } - if (jbd2_journal_start_commit(OCFS2_SB(sb)->journal->j_journal, + if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) { if (wait) - jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal, + jbd2_log_wait_commit(osb->journal->j_journal, target); } return 0; @@ -1161,6 +1161,23 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) ocfs2_complete_mount_recovery(osb); + osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL, + &ocfs2_kset->kobj); + if (!osb->osb_dev_kset) { + status = -ENOMEM; + mlog(ML_ERROR, "Unable to create device kset %s.\n", sb->s_id); + goto read_super_error; + } + + /* Create filecheck sysfs related directories/files at + * /sys/fs/ocfs2/<devname>/filecheck */ + if (ocfs2_filecheck_create_sysfs(osb)) { + status = -ENOMEM; + mlog(ML_ERROR, "Unable to create filecheck sysfs directory at " + "/sys/fs/ocfs2/%s/filecheck.\n", sb->s_id); + goto read_super_error; + } + if (ocfs2_mount_local(osb)) snprintf(nodestr, sizeof(nodestr), "local"); else @@ -1199,9 +1216,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) /* Start this when the mount is almost sure of being successful */ ocfs2_orphan_scan_start(osb); - /* Create filecheck sysfile /sys/fs/ocfs2/<devname>/filecheck */ - ocfs2_filecheck_create_sysfs(sb); - return status; read_super_error: @@ -1653,7 +1667,6 @@ static void ocfs2_put_super(struct super_block *sb) ocfs2_sync_blockdev(sb); ocfs2_dismount_volume(sb, 0); - ocfs2_filecheck_remove_sysfs(sb); } static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -1768,12 +1781,9 @@ static int ocfs2_initialize_mem_caches(void) NULL); if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep || !ocfs2_qf_chunk_cachep) { - if (ocfs2_inode_cachep) - kmem_cache_destroy(ocfs2_inode_cachep); - if (ocfs2_dquot_cachep) - kmem_cache_destroy(ocfs2_dquot_cachep); - if (ocfs2_qf_chunk_cachep) - kmem_cache_destroy(ocfs2_qf_chunk_cachep); + kmem_cache_destroy(ocfs2_inode_cachep); + kmem_cache_destroy(ocfs2_dquot_cachep); + kmem_cache_destroy(ocfs2_qf_chunk_cachep); return -ENOMEM; } @@ -1787,16 +1797,13 @@ static void ocfs2_free_mem_caches(void) * destroy cache. */ rcu_barrier(); - if (ocfs2_inode_cachep) - kmem_cache_destroy(ocfs2_inode_cachep); + kmem_cache_destroy(ocfs2_inode_cachep); ocfs2_inode_cachep = NULL; - if (ocfs2_dquot_cachep) - kmem_cache_destroy(ocfs2_dquot_cachep); + kmem_cache_destroy(ocfs2_dquot_cachep); ocfs2_dquot_cachep = NULL; - if (ocfs2_qf_chunk_cachep) - kmem_cache_destroy(ocfs2_qf_chunk_cachep); + kmem_cache_destroy(ocfs2_qf_chunk_cachep); ocfs2_qf_chunk_cachep = NULL; } @@ -1899,6 +1906,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) osb = OCFS2_SB(sb); BUG_ON(!osb); + /* Remove file check sysfs related directores/files, + * and wait for the pending file check operations */ + ocfs2_filecheck_remove_sysfs(osb); + + kset_unregister(osb->osb_dev_kset); + debugfs_remove(osb->osb_ctxt); /* Orphan scan should be stopped as early as possible */ diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 82e17b076ce7..78f09c76ab3c 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -633,6 +633,5 @@ int __init init_ocfs2_uptodate_cache(void) void exit_ocfs2_uptodate_cache(void) { - if (ocfs2_uptodate_cachep) - kmem_cache_destroy(ocfs2_uptodate_cachep); + kmem_cache_destroy(ocfs2_uptodate_cachep); } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index c261c1dfd374..3a24ce3deb01 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3564,7 +3564,7 @@ int ocfs2_xattr_set(struct inode *inode, .not_found = -ENODATA, }; - if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) + if (!ocfs2_supports_xattr(osb)) return -EOPNOTSUPP; /* diff --git a/fs/open.c b/fs/open.c index d0e955b558ad..c5ee7cd60424 100644 --- a/fs/open.c +++ b/fs/open.c @@ -724,16 +724,6 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) return ksys_fchown(fd, user, group); } -int open_check_o_direct(struct file *f) -{ - /* NB: we're sure to have correct a_ops only after f_op->open */ - if (f->f_flags & O_DIRECT) { - if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) - return -EINVAL; - } - return 0; -} - static int do_dentry_open(struct file *f, struct inode *inode, int (*open)(struct inode *, struct file *), @@ -755,7 +745,7 @@ static int do_dentry_open(struct file *f, if (unlikely(f->f_flags & O_PATH)) { f->f_mode = FMODE_PATH; f->f_op = &empty_fops; - return 0; + goto done; } if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { @@ -808,7 +798,12 @@ static int do_dentry_open(struct file *f, f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); - +done: + /* NB: we're sure to have correct a_ops only after f_op->open */ + error = -EINVAL; + if ((f->f_flags & O_DIRECT) && + (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)) + goto out_fput; return 0; cleanup_all: @@ -823,6 +818,9 @@ cleanup_file: f->f_path.dentry = NULL; f->f_inode = NULL; return error; +out_fput: + fput(f); + return error; } /** @@ -920,20 +918,14 @@ struct file *dentry_open(const struct path *path, int flags, BUG_ON(!path->mnt); f = get_empty_filp(); - if (!IS_ERR(f)) { - f->f_flags = flags; - error = vfs_open(path, f, cred); - if (!error) { - /* from now on we need fput() to dispose of f */ - error = open_check_o_direct(f); - if (error) { - fput(f); - f = ERR_PTR(error); - } - } else { - put_filp(f); - f = ERR_PTR(error); - } + if (IS_ERR(f)) + return f; + + f->f_flags = flags; + error = vfs_open(path, f, cred); + if (error) { + put_filp(f); + return ERR_PTR(error); } return f; } diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c index 480ea059a680..10587413b20e 100644 --- a/fs/orangefs/acl.c +++ b/fs/orangefs/acl.c @@ -9,7 +9,6 @@ #include "orangefs-kernel.h" #include "orangefs-bufmap.h" #include <linux/posix_acl_xattr.h> -#include <linux/fs_struct.h> struct posix_acl *orangefs_get_acl(struct inode *inode, int type) { diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index b03057afac2a..66369ec90020 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -463,11 +463,10 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, if (op->downcall.type != ORANGEFS_VFS_OP_READDIR) goto wakeup; - op->downcall.trailer_buf = vmalloc(op->downcall.trailer_size); + op->downcall.trailer_buf = vzalloc(op->downcall.trailer_size); if (!op->downcall.trailer_buf) goto Enomem; - memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size); if (!copy_from_iter_full(op->downcall.trailer_buf, op->downcall.trailer_size, iter)) { gossip_err("%s: failed to copy trailer.\n", __func__); @@ -779,9 +778,35 @@ static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd, #endif /* CONFIG_COMPAT is in .config */ +static __poll_t orangefs_devreq_poll(struct file *file, + struct poll_table_struct *poll_table) +{ + __poll_t poll_revent_mask = 0; + + poll_wait(file, &orangefs_request_list_waitq, poll_table); + + if (!list_empty(&orangefs_request_list)) + poll_revent_mask |= EPOLLIN; + return poll_revent_mask; +} + /* the assigned character device major number */ static int orangefs_dev_major; +static const struct file_operations orangefs_devreq_file_operations = { + .owner = THIS_MODULE, + .read = orangefs_devreq_read, + .write_iter = orangefs_devreq_write_iter, + .open = orangefs_devreq_open, + .release = orangefs_devreq_release, + .unlocked_ioctl = orangefs_devreq_ioctl, + +#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */ + .compat_ioctl = orangefs_devreq_compat_ioctl, +#endif + .poll = orangefs_devreq_poll +}; + /* * Initialize orangefs device specific state: * Must be called at module load time only @@ -814,29 +839,3 @@ void orangefs_dev_cleanup(void) "*** /dev/%s character device unregistered ***\n", ORANGEFS_REQDEVICE_NAME); } - -static __poll_t orangefs_devreq_poll(struct file *file, - struct poll_table_struct *poll_table) -{ - __poll_t poll_revent_mask = 0; - - poll_wait(file, &orangefs_request_list_waitq, poll_table); - - if (!list_empty(&orangefs_request_list)) - poll_revent_mask |= EPOLLIN; - return poll_revent_mask; -} - -const struct file_operations orangefs_devreq_file_operations = { - .owner = THIS_MODULE, - .read = orangefs_devreq_read, - .write_iter = orangefs_devreq_write_iter, - .open = orangefs_devreq_open, - .release = orangefs_devreq_release, - .unlocked_ioctl = orangefs_devreq_ioctl, - -#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */ - .compat_ioctl = orangefs_devreq_compat_ioctl, -#endif - .poll = orangefs_devreq_poll -}; diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 0d228cd087e6..26358efbf794 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -42,70 +42,6 @@ static int flush_racache(struct inode *inode) } /* - * Copy to client-core's address space from the buffers specified - * by the iovec upto total_size bytes. - * NOTE: the iovector can either contain addresses which - * can futher be kernel-space or user-space addresses. - * or it can pointers to struct page's - */ -static int precopy_buffers(int buffer_index, - struct iov_iter *iter, - size_t total_size) -{ - int ret = 0; - /* - * copy data from application/kernel by pulling it out - * of the iovec. - */ - - - if (total_size) { - ret = orangefs_bufmap_copy_from_iovec(iter, - buffer_index, - total_size); - if (ret < 0) - gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", - __func__, - (long)ret); - } - - if (ret < 0) - gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", - __func__, - (long)ret); - return ret; -} - -/* - * Copy from client-core's address space to the buffers specified - * by the iovec upto total_size bytes. - * NOTE: the iovector can either contain addresses which - * can futher be kernel-space or user-space addresses. - * or it can pointers to struct page's - */ -static int postcopy_buffers(int buffer_index, - struct iov_iter *iter, - size_t total_size) -{ - int ret = 0; - /* - * copy data to application/kernel by pushing it out to - * the iovec. NOTE; target buffers can be addresses or - * struct page pointers. - */ - if (total_size) { - ret = orangefs_bufmap_copy_to_iovec(iter, - buffer_index, - total_size); - if (ret < 0) - gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", - __func__, - (long)ret); - } - return ret; -} - -/* * Post and wait for the I/O upcall to finish */ static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, @@ -157,14 +93,15 @@ populate_shared_memory: total_size); /* * Stage 1: copy the buffers into client-core's address space - * precopy_buffers only pertains to writes. */ - if (type == ORANGEFS_IO_WRITE) { - ret = precopy_buffers(buffer_index, - iter, - total_size); - if (ret < 0) + if (type == ORANGEFS_IO_WRITE && total_size) { + ret = orangefs_bufmap_copy_from_iovec(iter, buffer_index, + total_size); + if (ret < 0) { + gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", + __func__, (long)ret); goto out; + } } gossip_debug(GOSSIP_FILE_DEBUG, @@ -260,14 +197,20 @@ populate_shared_memory: /* * Stage 3: Post copy buffers from client-core's address space - * postcopy_buffers only pertains to reads. */ - if (type == ORANGEFS_IO_READ) { - ret = postcopy_buffers(buffer_index, - iter, - new_op->downcall.resp.io.amt_complete); - if (ret < 0) + if (type == ORANGEFS_IO_READ && new_op->downcall.resp.io.amt_complete) { + /* + * NOTE: the iovector can either contain addresses which + * can futher be kernel-space or user-space addresses. + * or it can pointers to struct page's + */ + ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index, + new_op->downcall.resp.io.amt_complete); + if (ret < 0) { + gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", + __func__, (long)ret); goto out; + } } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Amount %s, returned by the sys-io call:%d\n", @@ -585,6 +528,28 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar return ret; } +static int orangefs_fault(struct vm_fault *vmf) +{ + struct file *file = vmf->vma->vm_file; + int rc; + rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1, + STATX_SIZE); + if (rc == -ESTALE) + rc = -EIO; + if (rc) { + gossip_err("%s: orangefs_inode_getattr failed, " + "rc:%d:.\n", __func__, rc); + return rc; + } + return filemap_fault(vmf); +} + +const struct vm_operations_struct orangefs_file_vm_ops = { + .fault = orangefs_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = filemap_page_mkwrite, +}; + /* * Memory map a region of a file. */ @@ -596,12 +561,16 @@ static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) (char *)file->f_path.dentry->d_name.name : (char *)"Unknown")); + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + return -EINVAL; + /* set the sequential readahead hint */ vma->vm_flags |= VM_SEQ_READ; vma->vm_flags &= ~VM_RAND_READ; - /* Use readonly mmap since we cannot support writable maps. */ - return generic_file_readonly_mmap(file, vma); + file_accessed(file); + vma->vm_ops = &orangefs_file_vm_ops; + return 0; } #define mapping_nrpages(idata) ((idata)->nrpages) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index fe1d705ad91f..79c61da8b1bc 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -138,7 +138,7 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb, } /** ORANGEFS2 implementation of address space operations */ -const struct address_space_operations orangefs_address_operations = { +static const struct address_space_operations orangefs_address_operations = { .readpage = orangefs_readpage, .readpages = orangefs_readpages, .invalidatepage = orangefs_invalidatepage, @@ -307,7 +307,7 @@ int orangefs_update_time(struct inode *inode, struct timespec *time, int flags) } /* ORANGEDS2 implementation of VFS inode operations for files */ -const struct inode_operations orangefs_file_inode_operations = { +static const struct inode_operations orangefs_file_inode_operations = { .get_acl = orangefs_get_acl, .set_acl = orangefs_set_acl, .setattr = orangefs_setattr, diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 59f444dced9b..4f927023d095 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -71,9 +71,9 @@ static void put(struct slot_map *m, int slot) spin_lock(&m->q.lock); __clear_bit(slot, m->map); v = ++m->c; - if (unlikely(v == 1)) /* no free slots -> one free slot */ + if (v > 0) wake_up_locked(&m->q); - else if (unlikely(v == -1)) /* finished dying */ + if (unlikely(v == -1)) /* finished dying */ wake_up_all_locked(&m->q); spin_unlock(&m->q.lock); } diff --git a/fs/orangefs/orangefs-debug.h b/fs/orangefs/orangefs-debug.h index c7db56a31b92..6e079d4230d0 100644 --- a/fs/orangefs/orangefs-debug.h +++ b/fs/orangefs/orangefs-debug.h @@ -43,12 +43,6 @@ #define GOSSIP_MAX_NR 16 #define GOSSIP_MAX_DEBUG (((__u64)1 << GOSSIP_MAX_NR) - 1) -/*function prototypes*/ -__u64 ORANGEFS_kmod_eventlog_to_mask(const char *event_logging); -__u64 ORANGEFS_debug_eventlog_to_mask(const char *event_logging); -char *ORANGEFS_debug_mask_to_eventlog(__u64 mask); -char *ORANGEFS_kmod_mask_to_eventlog(__u64 mask); - /* a private internal type */ struct __keyword_mask_s { const char *keyword; diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index eebbaece85ef..c29bb0ebc6bb 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -65,11 +65,7 @@ #define ORANGEFS_REQDEVICE_NAME "pvfs2-req" #define ORANGEFS_DEVREQ_MAGIC 0x20030529 -#define ORANGEFS_LINK_MAX 0x000000FF #define ORANGEFS_PURGE_RETRY_COUNT 0x00000005 -#define ORANGEFS_MAX_NUM_OPTIONS 0x00000004 -#define ORANGEFS_MAX_MOUNT_OPT_LEN 0x00000080 -#define ORANGEFS_MAX_FSKEY_LEN 64 #define MAX_DEV_REQ_UPSIZE (2 * sizeof(__s32) + \ sizeof(__u64) + sizeof(struct orangefs_upcall_s)) @@ -113,15 +109,6 @@ extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type); extern int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type); /* - * Redefine xtvec structure so that we could move helper functions out of - * the define - */ -struct xtvec { - __kernel_off_t xtv_off; /* must be off_t */ - __kernel_size_t xtv_len; /* must be size_t */ -}; - -/* * orangefs data structures */ struct orangefs_kernel_op_s { @@ -224,39 +211,6 @@ struct orangefs_sb_info_s { struct list_head list; }; -/* - * structure that holds the state of any async I/O operation issued - * through the VFS. Needed especially to handle cancellation requests - * or even completion notification so that the VFS client-side daemon - * can free up its vfs_request slots. - */ -struct orangefs_kiocb_s { - /* the pointer to the task that initiated the AIO */ - struct task_struct *tsk; - - /* pointer to the kiocb that kicked this operation */ - struct kiocb *kiocb; - - /* buffer index that was used for the I/O */ - struct orangefs_bufmap *bufmap; - int buffer_index; - - /* orangefs kernel operation type */ - struct orangefs_kernel_op_s *op; - - /* set to indicate the type of the operation */ - int rw; - - /* file offset */ - loff_t offset; - - /* and the count in bytes */ - size_t bytes_to_be_copied; - - ssize_t bytes_copied; - int needs_cleanup; -}; - struct orangefs_stats { unsigned long cache_hits; unsigned long cache_misses; @@ -305,21 +259,6 @@ static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode) return &(ORANGEFS_I(inode)->refn.khandle); } -static inline ino_t get_ino_from_khandle(struct inode *inode) -{ - struct orangefs_khandle *khandle; - ino_t ino; - - khandle = get_khandle_from_ino(inode); - ino = orangefs_khandle_to_ino(khandle); - return ino; -} - -static inline ino_t get_parent_ino_from_dentry(struct dentry *dentry) -{ - return get_ino_from_khandle(dentry->d_parent->d_inode); -} - static inline int is_root_handle(struct inode *inode) { gossip_debug(GOSSIP_DCACHE_DEBUG, @@ -391,7 +330,6 @@ void fsid_key_table_finalize(void); /* * defined in inode.c */ -__u32 convert_to_orangefs_mask(unsigned long lite_mask); struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, int mode, @@ -410,17 +348,6 @@ int orangefs_update_time(struct inode *, struct timespec *, int); /* * defined in xattr.c */ -int orangefs_setxattr(struct dentry *dentry, - const char *name, - const void *value, - size_t size, - int flags); - -ssize_t orangefs_getxattr(struct dentry *dentry, - const char *name, - void *buffer, - size_t size); - ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size); /* @@ -467,8 +394,6 @@ int orangefs_inode_check_changed(struct inode *inode); int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr); -int orangefs_unmount_sb(struct super_block *sb); - bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op); int orangefs_normalize_to_errno(__s32 error_code); @@ -487,16 +412,11 @@ extern struct list_head *orangefs_htable_ops_in_progress; extern spinlock_t orangefs_htable_ops_in_progress_lock; extern int hash_table_size; -extern const struct address_space_operations orangefs_address_operations; -extern const struct inode_operations orangefs_file_inode_operations; extern const struct file_operations orangefs_file_operations; extern const struct inode_operations orangefs_symlink_inode_operations; extern const struct inode_operations orangefs_dir_inode_operations; extern const struct file_operations orangefs_dir_operations; extern const struct dentry_operations orangefs_dentry_operations; -extern const struct file_operations orangefs_devreq_file_operations; - -extern wait_queue_head_t orangefs_bufmap_init_waitq; /* * misc convenience macros diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index ea6256d136d1..00fadaf0da8f 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -500,7 +500,7 @@ int orangefs_normalize_to_errno(__s32 error_code) * server. */ } else if (error_code > 0) { - gossip_err("orangefs: error status receieved.\n"); + gossip_err("orangefs: error status received.\n"); gossip_err("orangefs: assuming error code is inverted.\n"); error_code = -error_code; } diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index dc6e3e6269c3..61ee8d64c842 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -5,11 +5,6 @@ #include <linux/slab.h> #include <linux/ioctl.h> -/* pvfs2-config.h ***********************************************************/ -#define ORANGEFS_VERSION_MAJOR 2 -#define ORANGEFS_VERSION_MINOR 9 -#define ORANGEFS_VERSION_SUB 0 - /* khandle stuff ***********************************************************/ /* @@ -70,16 +65,6 @@ static inline void ORANGEFS_khandle_from(struct orangefs_khandle *kh, } /* pvfs2-types.h ************************************************************/ -typedef __u32 ORANGEFS_uid; -typedef __u32 ORANGEFS_gid; -typedef __s32 ORANGEFS_fs_id; -typedef __u32 ORANGEFS_permissions; -typedef __u64 ORANGEFS_time; -typedef __s64 ORANGEFS_size; -typedef __u64 ORANGEFS_flags; -typedef __u64 ORANGEFS_ds_position; -typedef __s32 ORANGEFS_error; -typedef __s64 ORANGEFS_offset; #define ORANGEFS_SUPER_MAGIC 0x20030528 @@ -145,7 +130,6 @@ typedef __s64 ORANGEFS_offset; #define ORANGEFS_APPEND_FL FS_APPEND_FL #define ORANGEFS_NOATIME_FL FS_NOATIME_FL #define ORANGEFS_MIRROR_FL 0x01000000ULL -#define ORANGEFS_O_EXECUTE (1 << 0) #define ORANGEFS_FS_ID_NULL ((__s32)0) #define ORANGEFS_ATTR_SYS_UID (1 << 0) #define ORANGEFS_ATTR_SYS_GID (1 << 1) @@ -229,35 +213,6 @@ enum orangefs_ds_type { ORANGEFS_TYPE_INTERNAL = (1 << 5) /* for the server's private use */ }; -/* - * ORANGEFS_certificate simply stores a buffer with the buffer size. - * The buffer can be converted to an OpenSSL X509 struct for use. - */ -struct ORANGEFS_certificate { - __u32 buf_size; - unsigned char *buf; -}; - -/* - * A credential identifies a user and is signed by the client/user - * private key. - */ -struct ORANGEFS_credential { - __u32 userid; /* user id */ - __u32 num_groups; /* length of group_array */ - __u32 *group_array; /* groups for which the user is a member */ - char *issuer; /* alias of the issuing server */ - __u64 timeout; /* seconds after epoch to time out */ - __u32 sig_size; /* length of the signature in bytes */ - unsigned char *signature; /* digital signature */ - struct ORANGEFS_certificate certificate; /* user certificate buffer */ -}; -#define extra_size_ORANGEFS_credential (ORANGEFS_REQ_LIMIT_GROUPS * \ - sizeof(__u32) + \ - ORANGEFS_REQ_LIMIT_ISSUER + \ - ORANGEFS_REQ_LIMIT_SIGNATURE + \ - extra_size_ORANGEFS_certificate) - /* This structure is used by the VFS-client interaction alone */ struct ORANGEFS_keyval_pair { char key[ORANGEFS_MAX_XATTR_NAMELEN]; diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index b42e5bd6d8ff..09c19ef91526 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -1,5 +1,6 @@ config PSTORE tristate "Persistent store support" + select CRYPTO if PSTORE_COMPRESS default n help This option enables generic access to platform level @@ -12,35 +13,89 @@ config PSTORE If you don't have a platform persistent store driver, say N. -choice - prompt "Choose compression algorithm" - depends on PSTORE - default PSTORE_ZLIB_COMPRESS - help - This option chooses compression algorithm. - -config PSTORE_ZLIB_COMPRESS - bool "ZLIB" - select ZLIB_DEFLATE - select ZLIB_INFLATE - help - This option enables ZLIB compression algorithm support. +config PSTORE_DEFLATE_COMPRESS + tristate "DEFLATE (ZLIB) compression" + default y + depends on PSTORE + select CRYPTO_DEFLATE + help + This option enables DEFLATE (also known as ZLIB) compression + algorithm support. config PSTORE_LZO_COMPRESS - bool "LZO" - select LZO_COMPRESS - select LZO_DECOMPRESS - help - This option enables LZO compression algorithm support. + tristate "LZO compression" + depends on PSTORE + select CRYPTO_LZO + help + This option enables LZO compression algorithm support. config PSTORE_LZ4_COMPRESS - bool "LZ4" - select LZ4_COMPRESS - select LZ4_DECOMPRESS - help - This option enables LZ4 compression algorithm support. + tristate "LZ4 compression" + depends on PSTORE + select CRYPTO_LZ4 + help + This option enables LZ4 compression algorithm support. + +config PSTORE_LZ4HC_COMPRESS + tristate "LZ4HC compression" + depends on PSTORE + select CRYPTO_LZ4HC + help + This option enables LZ4HC (high compression) mode algorithm. + +config PSTORE_842_COMPRESS + bool "842 compression" + depends on PSTORE + select CRYPTO_842 + help + This option enables 842 compression algorithm support. + +config PSTORE_COMPRESS + def_bool y + depends on PSTORE + depends on PSTORE_DEFLATE_COMPRESS || PSTORE_LZO_COMPRESS || \ + PSTORE_LZ4_COMPRESS || PSTORE_LZ4HC_COMPRESS || \ + PSTORE_842_COMPRESS + +choice + prompt "Default pstore compression algorithm" + depends on PSTORE_COMPRESS + help + This option chooses the default active compression algorithm. + This change be changed at boot with "pstore.compress=..." on + the kernel command line. + + Currently, pstore has support for 5 compression algorithms: + deflate, lzo, lz4, lz4hc and 842. + + The default compression algorithm is deflate. + + config PSTORE_DEFLATE_COMPRESS_DEFAULT + bool "deflate" if PSTORE_DEFLATE_COMPRESS + + config PSTORE_LZO_COMPRESS_DEFAULT + bool "lzo" if PSTORE_LZO_COMPRESS + + config PSTORE_LZ4_COMPRESS_DEFAULT + bool "lz4" if PSTORE_LZ4_COMPRESS + + config PSTORE_LZ4HC_COMPRESS_DEFAULT + bool "lz4hc" if PSTORE_LZ4HC_COMPRESS + + config PSTORE_842_COMPRESS_DEFAULT + bool "842" if PSTORE_842_COMPRESS + endchoice +config PSTORE_COMPRESS_DEFAULT + string + depends on PSTORE_COMPRESS + default "deflate" if PSTORE_DEFLATE_COMPRESS_DEFAULT + default "lzo" if PSTORE_LZO_COMPRESS_DEFAULT + default "lz4" if PSTORE_LZ4_COMPRESS_DEFAULT + default "lz4hc" if PSTORE_LZ4HC_COMPRESS_DEFAULT + default "842" if PSTORE_842_COMPRESS_DEFAULT + config PSTORE_CONSOLE bool "Log kernel console messages" depends on PSTORE diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index d814723fb27d..5fcb845b9fec 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -486,6 +486,8 @@ static int __init init_pstore_fs(void) { int err; + pstore_choose_compression(); + /* Create a convenient mount point for people to access pstore */ err = sysfs_create_mount_point(fs_kobj, "pstore"); if (err) diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index c029314478fa..fb767e28aeb2 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -37,4 +37,7 @@ extern bool pstore_is_mounted(void); extern void pstore_record_init(struct pstore_record *record, struct pstore_info *psi); +/* Called during module_init() */ +extern void __init pstore_choose_compression(void); + #endif diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index c3129b131e4d..dc720573fd53 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -28,15 +28,13 @@ #include <linux/console.h> #include <linux/module.h> #include <linux/pstore.h> -#ifdef CONFIG_PSTORE_ZLIB_COMPRESS -#include <linux/zlib.h> -#endif -#ifdef CONFIG_PSTORE_LZO_COMPRESS +#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS) #include <linux/lzo.h> #endif -#ifdef CONFIG_PSTORE_LZ4_COMPRESS +#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) || IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS) #include <linux/lz4.h> #endif +#include <linux/crypto.h> #include <linux/string.h> #include <linux/timer.h> #include <linux/slab.h> @@ -74,23 +72,18 @@ static DEFINE_SPINLOCK(pstore_lock); struct pstore_info *psinfo; static char *backend; - -/* Compression parameters */ -#ifdef CONFIG_PSTORE_ZLIB_COMPRESS -#define COMPR_LEVEL 6 -#define WINDOW_BITS 12 -#define MEM_LEVEL 4 -static struct z_stream_s stream; +static char *compress = +#ifdef CONFIG_PSTORE_COMPRESS_DEFAULT + CONFIG_PSTORE_COMPRESS_DEFAULT; #else -static unsigned char *workspace; + NULL; #endif -struct pstore_zbackend { - int (*compress)(const void *in, void *out, size_t inlen, size_t outlen); - int (*decompress)(void *in, void *out, size_t inlen, size_t outlen); - void (*allocate)(void); - void (*free)(void); +/* Compression parameters */ +static struct crypto_comp *tfm; +struct pstore_zbackend { + int (*zbufsize)(size_t size); const char *name; }; @@ -149,77 +142,12 @@ bool pstore_cannot_block_path(enum kmsg_dump_reason reason) } EXPORT_SYMBOL_GPL(pstore_cannot_block_path); -#ifdef CONFIG_PSTORE_ZLIB_COMPRESS -/* Derived from logfs_compress() */ -static int compress_zlib(const void *in, void *out, size_t inlen, size_t outlen) -{ - int err, ret; - - ret = -EIO; - err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, - MEM_LEVEL, Z_DEFAULT_STRATEGY); - if (err != Z_OK) - goto error; - - stream.next_in = in; - stream.avail_in = inlen; - stream.total_in = 0; - stream.next_out = out; - stream.avail_out = outlen; - stream.total_out = 0; - - err = zlib_deflate(&stream, Z_FINISH); - if (err != Z_STREAM_END) - goto error; - - err = zlib_deflateEnd(&stream); - if (err != Z_OK) - goto error; - - if (stream.total_out >= stream.total_in) - goto error; - - ret = stream.total_out; -error: - return ret; -} - -/* Derived from logfs_uncompress */ -static int decompress_zlib(void *in, void *out, size_t inlen, size_t outlen) +#if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS) +static int zbufsize_deflate(size_t size) { - int err, ret; - - ret = -EIO; - err = zlib_inflateInit2(&stream, WINDOW_BITS); - if (err != Z_OK) - goto error; - - stream.next_in = in; - stream.avail_in = inlen; - stream.total_in = 0; - stream.next_out = out; - stream.avail_out = outlen; - stream.total_out = 0; - - err = zlib_inflate(&stream, Z_FINISH); - if (err != Z_STREAM_END) - goto error; - - err = zlib_inflateEnd(&stream); - if (err != Z_OK) - goto error; - - ret = stream.total_out; -error: - return ret; -} - -static void allocate_zlib(void) -{ - size_t size; size_t cmpr; - switch (psinfo->bufsize) { + switch (size) { /* buffer range for efivars */ case 1000 ... 2000: cmpr = 56; @@ -239,212 +167,131 @@ static void allocate_zlib(void) break; } - big_oops_buf_sz = (psinfo->bufsize * 100) / cmpr; - big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); - if (big_oops_buf) { - size = max(zlib_deflate_workspacesize(WINDOW_BITS, MEM_LEVEL), - zlib_inflate_workspacesize()); - stream.workspace = kmalloc(size, GFP_KERNEL); - if (!stream.workspace) { - pr_err("No memory for compression workspace; skipping compression\n"); - kfree(big_oops_buf); - big_oops_buf = NULL; - } - } else { - pr_err("No memory for uncompressed data; skipping compression\n"); - stream.workspace = NULL; - } - + return (size * 100) / cmpr; } - -static void free_zlib(void) -{ - kfree(stream.workspace); - stream.workspace = NULL; - kfree(big_oops_buf); - big_oops_buf = NULL; - big_oops_buf_sz = 0; -} - -static const struct pstore_zbackend backend_zlib = { - .compress = compress_zlib, - .decompress = decompress_zlib, - .allocate = allocate_zlib, - .free = free_zlib, - .name = "zlib", -}; #endif -#ifdef CONFIG_PSTORE_LZO_COMPRESS -static int compress_lzo(const void *in, void *out, size_t inlen, size_t outlen) +#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS) +static int zbufsize_lzo(size_t size) { - int ret; - - ret = lzo1x_1_compress(in, inlen, out, &outlen, workspace); - if (ret != LZO_E_OK) { - pr_err("lzo_compress error, ret = %d!\n", ret); - return -EIO; - } - - return outlen; + return lzo1x_worst_compress(size); } +#endif -static int decompress_lzo(void *in, void *out, size_t inlen, size_t outlen) +#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) || IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS) +static int zbufsize_lz4(size_t size) { - int ret; - - ret = lzo1x_decompress_safe(in, inlen, out, &outlen); - if (ret != LZO_E_OK) { - pr_err("lzo_decompress error, ret = %d!\n", ret); - return -EIO; - } - - return outlen; + return LZ4_compressBound(size); } +#endif -static void allocate_lzo(void) +#if IS_ENABLED(CONFIG_PSTORE_842_COMPRESS) +static int zbufsize_842(size_t size) { - big_oops_buf_sz = lzo1x_worst_compress(psinfo->bufsize); - big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); - if (big_oops_buf) { - workspace = kmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); - if (!workspace) { - pr_err("No memory for compression workspace; skipping compression\n"); - kfree(big_oops_buf); - big_oops_buf = NULL; - } - } else { - pr_err("No memory for uncompressed data; skipping compression\n"); - workspace = NULL; - } + return size; } +#endif -static void free_lzo(void) -{ - kfree(workspace); - kfree(big_oops_buf); - big_oops_buf = NULL; - big_oops_buf_sz = 0; -} +static const struct pstore_zbackend *zbackend __ro_after_init; -static const struct pstore_zbackend backend_lzo = { - .compress = compress_lzo, - .decompress = decompress_lzo, - .allocate = allocate_lzo, - .free = free_lzo, - .name = "lzo", -}; +static const struct pstore_zbackend zbackends[] = { +#if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS) + { + .zbufsize = zbufsize_deflate, + .name = "deflate", + }, +#endif +#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS) + { + .zbufsize = zbufsize_lzo, + .name = "lzo", + }, +#endif +#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) + { + .zbufsize = zbufsize_lz4, + .name = "lz4", + }, #endif +#if IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS) + { + .zbufsize = zbufsize_lz4, + .name = "lz4hc", + }, +#endif +#if IS_ENABLED(CONFIG_PSTORE_842_COMPRESS) + { + .zbufsize = zbufsize_842, + .name = "842", + }, +#endif + { } +}; -#ifdef CONFIG_PSTORE_LZ4_COMPRESS -static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen) +static int pstore_compress(const void *in, void *out, + unsigned int inlen, unsigned int outlen) { int ret; - ret = LZ4_compress_default(in, out, inlen, outlen, workspace); - if (!ret) { - pr_err("LZ4_compress_default error; compression failed!\n"); - return -EIO; + ret = crypto_comp_compress(tfm, in, inlen, out, &outlen); + if (ret) { + pr_err("crypto_comp_compress failed, ret = %d!\n", ret); + return ret; } - return ret; + return outlen; } -static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen) +static int pstore_decompress(void *in, void *out, + unsigned int inlen, unsigned int outlen) { int ret; - ret = LZ4_decompress_safe(in, out, inlen, outlen); - if (ret < 0) { - /* - * LZ4_decompress_safe will return an error code - * (< 0) if decompression failed - */ - pr_err("LZ4_decompress_safe error, ret = %d!\n", ret); - return -EIO; + ret = crypto_comp_decompress(tfm, in, inlen, out, &outlen); + if (ret) { + pr_err("crypto_comp_decompress failed, ret = %d!\n", ret); + return ret; } - return ret; -} - -static void allocate_lz4(void) -{ - big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize); - big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); - if (big_oops_buf) { - workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL); - if (!workspace) { - pr_err("No memory for compression workspace; skipping compression\n"); - kfree(big_oops_buf); - big_oops_buf = NULL; - } - } else { - pr_err("No memory for uncompressed data; skipping compression\n"); - workspace = NULL; - } + return outlen; } -static void free_lz4(void) +static void allocate_buf_for_compression(void) { - kfree(workspace); - kfree(big_oops_buf); - big_oops_buf = NULL; - big_oops_buf_sz = 0; -} - -static const struct pstore_zbackend backend_lz4 = { - .compress = compress_lz4, - .decompress = decompress_lz4, - .allocate = allocate_lz4, - .free = free_lz4, - .name = "lz4", -}; -#endif - -static const struct pstore_zbackend *zbackend = -#if defined(CONFIG_PSTORE_ZLIB_COMPRESS) - &backend_zlib; -#elif defined(CONFIG_PSTORE_LZO_COMPRESS) - &backend_lzo; -#elif defined(CONFIG_PSTORE_LZ4_COMPRESS) - &backend_lz4; -#else - NULL; -#endif + if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !zbackend) + return; -static int pstore_compress(const void *in, void *out, - size_t inlen, size_t outlen) -{ - if (zbackend) - return zbackend->compress(in, out, inlen, outlen); - else - return -EIO; -} + if (!crypto_has_comp(zbackend->name, 0, 0)) { + pr_err("No %s compression\n", zbackend->name); + return; + } -static int pstore_decompress(void *in, void *out, size_t inlen, size_t outlen) -{ - if (zbackend) - return zbackend->decompress(in, out, inlen, outlen); - else - return -EIO; -} + big_oops_buf_sz = zbackend->zbufsize(psinfo->bufsize); + if (big_oops_buf_sz <= 0) + return; -static void allocate_buf_for_compression(void) -{ - if (zbackend) { - pr_info("using %s compression\n", zbackend->name); - zbackend->allocate(); - } else { + big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); + if (!big_oops_buf) { pr_err("allocate compression buffer error!\n"); + return; + } + + tfm = crypto_alloc_comp(zbackend->name, 0, 0); + if (IS_ERR_OR_NULL(tfm)) { + kfree(big_oops_buf); + big_oops_buf = NULL; + pr_err("crypto_alloc_comp() failed!\n"); + return; } } static void free_buf_for_compression(void) { - if (zbackend) - zbackend->free(); - else - pr_err("free compression buffer error!\n"); + if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && !IS_ERR_OR_NULL(tfm)) + crypto_free_comp(tfm); + kfree(big_oops_buf); + big_oops_buf = NULL; + big_oops_buf_sz = 0; } /* @@ -901,5 +748,24 @@ static void pstore_timefunc(struct timer_list *unused) jiffies + msecs_to_jiffies(pstore_update_ms)); } +void __init pstore_choose_compression(void) +{ + const struct pstore_zbackend *step; + + if (!compress) + return; + + for (step = zbackends; step->name; step++) { + if (!strcmp(compress, step->name)) { + zbackend = step; + pr_info("using %s compression\n", zbackend->name); + return; + } + } +} + +module_param(compress, charp, 0444); +MODULE_PARM_DESC(compress, "Pstore compression to use"); + module_param(backend, charp, 0444); MODULE_PARM_DESC(backend, "Pstore backend to use"); diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 7125b398d312..49b2bc114868 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -938,7 +938,7 @@ static int __init ramoops_init(void) ramoops_register_dummy(); return platform_driver_register(&ramoops_driver); } -postcore_initcall(ramoops_init); +late_initcall(ramoops_init); static void __exit ramoops_exit(void) { diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index e11672aa4575..951a14edcf51 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -98,24 +98,23 @@ static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, uint8_t *data, size_t len, uint8_t *ecc) { int i; - uint16_t par[prz->ecc_info.ecc_size]; /* Initialize the parity buffer */ - memset(par, 0, sizeof(par)); - encode_rs8(prz->rs_decoder, data, len, par, 0); + memset(prz->ecc_info.par, 0, + prz->ecc_info.ecc_size * sizeof(prz->ecc_info.par[0])); + encode_rs8(prz->rs_decoder, data, len, prz->ecc_info.par, 0); for (i = 0; i < prz->ecc_info.ecc_size; i++) - ecc[i] = par[i]; + ecc[i] = prz->ecc_info.par[i]; } static int persistent_ram_decode_rs8(struct persistent_ram_zone *prz, void *data, size_t len, uint8_t *ecc) { int i; - uint16_t par[prz->ecc_info.ecc_size]; for (i = 0; i < prz->ecc_info.ecc_size; i++) - par[i] = ecc[i]; - return decode_rs8(prz->rs_decoder, data, par, len, + prz->ecc_info.par[i] = ecc[i]; + return decode_rs8(prz->rs_decoder, data, prz->ecc_info.par, len, NULL, 0, NULL, 0, NULL); } @@ -228,6 +227,15 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz, return -EINVAL; } + /* allocate workspace instead of using stack VLA */ + prz->ecc_info.par = kmalloc_array(prz->ecc_info.ecc_size, + sizeof(*prz->ecc_info.par), + GFP_KERNEL); + if (!prz->ecc_info.par) { + pr_err("cannot allocate ECC parity workspace\n"); + return -ENOMEM; + } + prz->corrected_bytes = 0; prz->bad_blocks = 0; @@ -514,6 +522,13 @@ void persistent_ram_free(struct persistent_ram_zone *prz) } prz->vaddr = NULL; } + if (prz->rs_decoder) { + free_rs(prz->rs_decoder); + prz->rs_decoder = NULL; + } + kfree(prz->ecc_info.par); + prz->ecc_info.par = NULL; + persistent_ram_free_old(prz); kfree(prz); } diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 48835a659948..ae4811fecc1f 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -1916,7 +1916,7 @@ struct reiserfs_de_head { /* empty directory contains two entries "." and ".." and their headers */ #define EMPTY_DIR_SIZE \ -(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen (".."))) +(DEH_SIZE * 2 + ROUND_UP (sizeof(".") - 1) + ROUND_UP (sizeof("..") - 1)) /* old format directories have this size when empty */ #define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index cf348ba99238..1acb2ff505e6 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1256,7 +1256,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, * Inode length changed, so we have to make sure * @I_DIRTY_DATASYNC is set. */ - __mark_inode_dirty(inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC); + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); else mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); diff --git a/fs/udf/file.c b/fs/udf/file.c index 356c2bf148a5..cd31e4f6d6da 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -257,12 +257,22 @@ const struct file_operations udf_file_operations = { static int udf_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); + struct super_block *sb = inode->i_sb; int error; error = setattr_prepare(dentry, attr); if (error) return error; + if ((attr->ia_valid & ATTR_UID) && + UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET) && + !uid_eq(attr->ia_uid, UDF_SB(sb)->s_uid)) + return -EPERM; + if ((attr->ia_valid & ATTR_GID) && + UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET) && + !gid_eq(attr->ia_gid, UDF_SB(sb)->s_gid)) + return -EPERM; + if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { error = udf_setsize(inode, attr->ia_size); diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index b6e420c1bfeb..b7a0d4b4bda1 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -104,6 +104,10 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) } inode_init_owner(inode, dir, mode); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET)) + inode->i_uid = sbi->s_uid; + if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) + inode->i_gid = sbi->s_gid; iinfo->i_location.logicalBlockNum = block; iinfo->i_location.partitionReferenceNum = diff --git a/fs/udf/inode.c b/fs/udf/inode.c index c23744d5ae5c..c80765d62f7e 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1275,6 +1275,7 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode) unsigned int indirections = 0; int bs = inode->i_sb->s_blocksize; int ret = -EIO; + uint32_t uid, gid; reread: if (iloc->partitionReferenceNum >= sbi->s_partitions) { @@ -1400,17 +1401,19 @@ reread: ret = -EIO; read_lock(&sbi->s_cred_lock); - i_uid_write(inode, le32_to_cpu(fe->uid)); - if (!uid_valid(inode->i_uid) || - UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) || + uid = le32_to_cpu(fe->uid); + if (uid == UDF_INVALID_ID || UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_SET)) - inode->i_uid = UDF_SB(inode->i_sb)->s_uid; + inode->i_uid = sbi->s_uid; + else + i_uid_write(inode, uid); - i_gid_write(inode, le32_to_cpu(fe->gid)); - if (!gid_valid(inode->i_gid) || - UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_IGNORE) || + gid = le32_to_cpu(fe->gid); + if (gid == UDF_INVALID_ID || UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET)) - inode->i_gid = UDF_SB(inode->i_sb)->s_gid; + inode->i_gid = sbi->s_gid; + else + i_gid_write(inode, gid); if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY && sbi->s_fmode != UDF_INVALID_MODE) @@ -1655,12 +1658,12 @@ static int udf_update_inode(struct inode *inode, int do_sync) } if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET)) - fe->uid = cpu_to_le32(-1); + fe->uid = cpu_to_le32(UDF_INVALID_ID); else fe->uid = cpu_to_le32(i_uid_read(inode)); if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET)) - fe->gid = cpu_to_le32(-1); + fe->gid = cpu_to_le32(UDF_INVALID_ID); else fe->gid = cpu_to_le32(i_gid_read(inode)); diff --git a/fs/udf/super.c b/fs/udf/super.c index f73239a9a97d..7949c338efa5 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -64,14 +64,13 @@ #include <linux/init.h> #include <linux/uaccess.h> -#define VDS_POS_PRIMARY_VOL_DESC 0 -#define VDS_POS_UNALLOC_SPACE_DESC 1 -#define VDS_POS_LOGICAL_VOL_DESC 2 -#define VDS_POS_PARTITION_DESC 3 -#define VDS_POS_IMP_USE_VOL_DESC 4 -#define VDS_POS_VOL_DESC_PTR 5 -#define VDS_POS_TERMINATING_DESC 6 -#define VDS_POS_LENGTH 7 +enum { + VDS_POS_PRIMARY_VOL_DESC, + VDS_POS_UNALLOC_SPACE_DESC, + VDS_POS_LOGICAL_VOL_DESC, + VDS_POS_IMP_USE_VOL_DESC, + VDS_POS_LENGTH +}; #define VSD_FIRST_SECTOR_OFFSET 32768 #define VSD_MAX_SECTOR_OFFSET 0x800000 @@ -223,10 +222,6 @@ struct udf_options { unsigned int session; unsigned int lastblock; unsigned int anchor; - unsigned int volume; - unsigned short partition; - unsigned int fileset; - unsigned int rootdir; unsigned int flags; umode_t umask; kgid_t gid; @@ -349,12 +344,8 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",shortad"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_FORGET)) seq_puts(seq, ",uid=forget"); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_IGNORE)) - seq_puts(seq, ",uid=ignore"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_FORGET)) seq_puts(seq, ",gid=forget"); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_IGNORE)) - seq_puts(seq, ",gid=ignore"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET)) seq_printf(seq, ",uid=%u", from_kuid(&init_user_ns, sbi->s_uid)); if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) @@ -371,10 +362,6 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",lastblock=%u", sbi->s_last_block); if (sbi->s_anchor != 0) seq_printf(seq, ",anchor=%u", sbi->s_anchor); - /* - * volume, partition, fileset and rootdir seem to be ignored - * currently - */ if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) seq_puts(seq, ",utf8"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP) && sbi->s_nls_map) @@ -487,14 +474,9 @@ static int udf_parse_options(char *options, struct udf_options *uopt, int option; uopt->novrs = 0; - uopt->partition = 0xFFFF; uopt->session = 0xFFFFFFFF; uopt->lastblock = 0; uopt->anchor = 0; - uopt->volume = 0xFFFFFFFF; - uopt->rootdir = 0xFFFFFFFF; - uopt->fileset = 0xFFFFFFFF; - uopt->nls_map = NULL; if (!options) return 1; @@ -582,42 +564,30 @@ static int udf_parse_options(char *options, struct udf_options *uopt, uopt->anchor = option; break; case Opt_volume: - if (match_int(args, &option)) - return 0; - uopt->volume = option; - break; case Opt_partition: - if (match_int(args, &option)) - return 0; - uopt->partition = option; - break; case Opt_fileset: - if (match_int(args, &option)) - return 0; - uopt->fileset = option; - break; case Opt_rootdir: - if (match_int(args, &option)) - return 0; - uopt->rootdir = option; + /* Ignored (never implemented properly) */ break; case Opt_utf8: uopt->flags |= (1 << UDF_FLAG_UTF8); break; #ifdef CONFIG_UDF_NLS case Opt_iocharset: - uopt->nls_map = load_nls(args[0].from); - uopt->flags |= (1 << UDF_FLAG_NLS_MAP); + if (!remount) { + if (uopt->nls_map) + unload_nls(uopt->nls_map); + uopt->nls_map = load_nls(args[0].from); + uopt->flags |= (1 << UDF_FLAG_NLS_MAP); + } break; #endif - case Opt_uignore: - uopt->flags |= (1 << UDF_FLAG_UID_IGNORE); - break; case Opt_uforget: uopt->flags |= (1 << UDF_FLAG_UID_FORGET); break; + case Opt_uignore: case Opt_gignore: - uopt->flags |= (1 << UDF_FLAG_GID_IGNORE); + /* These options are superseeded by uid=<number> */ break; case Opt_gforget: uopt->flags |= (1 << UDF_FLAG_GID_FORGET); @@ -660,6 +630,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) uopt.umask = sbi->s_umask; uopt.fmode = sbi->s_fmode; uopt.dmode = sbi->s_dmode; + uopt.nls_map = NULL; if (!udf_parse_options(options, &uopt, true)) return -EINVAL; @@ -1592,6 +1563,60 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ sbi->s_lvid_bh = NULL; } +/* + * Step for reallocation of table of partition descriptor sequence numbers. + * Must be power of 2. + */ +#define PART_DESC_ALLOC_STEP 32 + +struct desc_seq_scan_data { + struct udf_vds_record vds[VDS_POS_LENGTH]; + unsigned int size_part_descs; + struct udf_vds_record *part_descs_loc; +}; + +static struct udf_vds_record *handle_partition_descriptor( + struct buffer_head *bh, + struct desc_seq_scan_data *data) +{ + struct partitionDesc *desc = (struct partitionDesc *)bh->b_data; + int partnum; + + partnum = le16_to_cpu(desc->partitionNumber); + if (partnum >= data->size_part_descs) { + struct udf_vds_record *new_loc; + unsigned int new_size = ALIGN(partnum, PART_DESC_ALLOC_STEP); + + new_loc = kzalloc(sizeof(*new_loc) * new_size, GFP_KERNEL); + if (!new_loc) + return ERR_PTR(-ENOMEM); + memcpy(new_loc, data->part_descs_loc, + data->size_part_descs * sizeof(*new_loc)); + kfree(data->part_descs_loc); + data->part_descs_loc = new_loc; + data->size_part_descs = new_size; + } + return &(data->part_descs_loc[partnum]); +} + + +static struct udf_vds_record *get_volume_descriptor_record(uint16_t ident, + struct buffer_head *bh, struct desc_seq_scan_data *data) +{ + switch (ident) { + case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */ + return &(data->vds[VDS_POS_PRIMARY_VOL_DESC]); + case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ + return &(data->vds[VDS_POS_IMP_USE_VOL_DESC]); + case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */ + return &(data->vds[VDS_POS_LOGICAL_VOL_DESC]); + case TAG_IDENT_USD: /* ISO 13346 3/10.8 */ + return &(data->vds[VDS_POS_UNALLOC_SPACE_DESC]); + case TAG_IDENT_PD: /* ISO 13346 3/10.5 */ + return handle_partition_descriptor(bh, data); + } + return NULL; +} /* * Process a main/reserve volume descriptor sequence. @@ -1608,18 +1633,23 @@ static noinline int udf_process_sequence( struct kernel_lb_addr *fileset) { struct buffer_head *bh = NULL; - struct udf_vds_record vds[VDS_POS_LENGTH]; struct udf_vds_record *curr; struct generic_desc *gd; struct volDescPtr *vdp; bool done = false; uint32_t vdsn; uint16_t ident; - long next_s = 0, next_e = 0; int ret; unsigned int indirections = 0; - - memset(vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH); + struct desc_seq_scan_data data; + unsigned int i; + + memset(data.vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH); + data.size_part_descs = PART_DESC_ALLOC_STEP; + data.part_descs_loc = kzalloc(sizeof(*data.part_descs_loc) * + data.size_part_descs, GFP_KERNEL); + if (!data.part_descs_loc) + return -ENOMEM; /* * Read the main descriptor sequence and find which descriptors @@ -1628,79 +1658,51 @@ static noinline int udf_process_sequence( for (; (!done && block <= lastblock); block++) { bh = udf_read_tagged(sb, block, block, &ident); - if (!bh) { - udf_err(sb, - "Block %llu of volume descriptor sequence is corrupted or we could not read it\n", - (unsigned long long)block); - return -EAGAIN; - } + if (!bh) + break; /* Process each descriptor (ISO 13346 3/8.3-8.4) */ gd = (struct generic_desc *)bh->b_data; vdsn = le32_to_cpu(gd->volDescSeqNum); switch (ident) { - case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */ - curr = &vds[VDS_POS_PRIMARY_VOL_DESC]; - if (vdsn >= curr->volDescSeqNum) { - curr->volDescSeqNum = vdsn; - curr->block = block; - } - break; case TAG_IDENT_VDP: /* ISO 13346 3/10.3 */ - curr = &vds[VDS_POS_VOL_DESC_PTR]; - if (vdsn >= curr->volDescSeqNum) { - curr->volDescSeqNum = vdsn; - curr->block = block; - - vdp = (struct volDescPtr *)bh->b_data; - next_s = le32_to_cpu( - vdp->nextVolDescSeqExt.extLocation); - next_e = le32_to_cpu( - vdp->nextVolDescSeqExt.extLength); - next_e = next_e >> sb->s_blocksize_bits; - next_e += next_s; + if (++indirections > UDF_MAX_TD_NESTING) { + udf_err(sb, "too many Volume Descriptor " + "Pointers (max %u supported)\n", + UDF_MAX_TD_NESTING); + brelse(bh); + return -EIO; } + + vdp = (struct volDescPtr *)bh->b_data; + block = le32_to_cpu(vdp->nextVolDescSeqExt.extLocation); + lastblock = le32_to_cpu( + vdp->nextVolDescSeqExt.extLength) >> + sb->s_blocksize_bits; + lastblock += block - 1; + /* For loop is going to increment 'block' again */ + block--; break; + case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */ case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ - curr = &vds[VDS_POS_IMP_USE_VOL_DESC]; - if (vdsn >= curr->volDescSeqNum) { - curr->volDescSeqNum = vdsn; - curr->block = block; - } - break; - case TAG_IDENT_PD: /* ISO 13346 3/10.5 */ - curr = &vds[VDS_POS_PARTITION_DESC]; - if (!curr->block) - curr->block = block; - break; case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */ - curr = &vds[VDS_POS_LOGICAL_VOL_DESC]; - if (vdsn >= curr->volDescSeqNum) { - curr->volDescSeqNum = vdsn; - curr->block = block; - } - break; case TAG_IDENT_USD: /* ISO 13346 3/10.8 */ - curr = &vds[VDS_POS_UNALLOC_SPACE_DESC]; + case TAG_IDENT_PD: /* ISO 13346 3/10.5 */ + curr = get_volume_descriptor_record(ident, bh, &data); + if (IS_ERR(curr)) { + brelse(bh); + return PTR_ERR(curr); + } + /* Descriptor we don't care about? */ + if (!curr) + break; if (vdsn >= curr->volDescSeqNum) { curr->volDescSeqNum = vdsn; curr->block = block; } break; case TAG_IDENT_TD: /* ISO 13346 3/10.9 */ - if (++indirections > UDF_MAX_TD_NESTING) { - udf_err(sb, "too many TDs (max %u supported)\n", UDF_MAX_TD_NESTING); - brelse(bh); - return -EIO; - } - - vds[VDS_POS_TERMINATING_DESC].block = block; - if (next_e) { - block = next_s; - lastblock = next_e; - next_s = next_e = 0; - } else - done = true; + done = true; break; } brelse(bh); @@ -1709,31 +1711,27 @@ static noinline int udf_process_sequence( * Now read interesting descriptors again and process them * in a suitable order */ - if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) { + if (!data.vds[VDS_POS_PRIMARY_VOL_DESC].block) { udf_err(sb, "Primary Volume Descriptor not found!\n"); return -EAGAIN; } - ret = udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block); + ret = udf_load_pvoldesc(sb, data.vds[VDS_POS_PRIMARY_VOL_DESC].block); if (ret < 0) return ret; - if (vds[VDS_POS_LOGICAL_VOL_DESC].block) { + if (data.vds[VDS_POS_LOGICAL_VOL_DESC].block) { ret = udf_load_logicalvol(sb, - vds[VDS_POS_LOGICAL_VOL_DESC].block, - fileset); + data.vds[VDS_POS_LOGICAL_VOL_DESC].block, + fileset); if (ret < 0) return ret; } - if (vds[VDS_POS_PARTITION_DESC].block) { - /* - * We rescan the whole descriptor sequence to find - * partition descriptor blocks and process them. - */ - for (block = vds[VDS_POS_PARTITION_DESC].block; - block < vds[VDS_POS_TERMINATING_DESC].block; - block++) { - ret = udf_load_partdesc(sb, block); + /* Now handle prevailing Partition Descriptors */ + for (i = 0; i < data.size_part_descs; i++) { + if (data.part_descs_loc[i].block) { + ret = udf_load_partdesc(sb, + data.part_descs_loc[i].block); if (ret < 0) return ret; } @@ -1760,13 +1758,13 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh, main_s = le32_to_cpu(anchor->mainVolDescSeqExt.extLocation); main_e = le32_to_cpu(anchor->mainVolDescSeqExt.extLength); main_e = main_e >> sb->s_blocksize_bits; - main_e += main_s; + main_e += main_s - 1; /* Locate the reserve sequence */ reserve_s = le32_to_cpu(anchor->reserveVolDescSeqExt.extLocation); reserve_e = le32_to_cpu(anchor->reserveVolDescSeqExt.extLength); reserve_e = reserve_e >> sb->s_blocksize_bits; - reserve_e += reserve_s; + reserve_e += reserve_s - 1; /* Process the main & reserve sequences */ /* responsible for finding the PartitionDesc(s) */ @@ -1994,7 +1992,10 @@ static void udf_open_lvid(struct super_block *sb) lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; ktime_get_real_ts(&ts); udf_time_to_disk_stamp(&lvid->recordingDateAndTime, ts); - lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN); + if (le32_to_cpu(lvid->integrityType) == LVID_INTEGRITY_TYPE_CLOSE) + lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN); + else + UDF_SET_FLAG(sb, UDF_FLAG_INCONSISTENT); lvid->descTag.descCRC = cpu_to_le16( crc_itu_t(0, (char *)lvid + sizeof(struct tag), @@ -2034,7 +2035,8 @@ static void udf_close_lvid(struct super_block *sb) lvidiu->minUDFReadRev = cpu_to_le16(sbi->s_udfrev); if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFWriteRev)) lvidiu->minUDFWriteRev = cpu_to_le16(sbi->s_udfrev); - lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); + if (!UDF_QUERY_FLAG(sb, UDF_FLAG_INCONSISTENT)) + lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); lvid->descTag.descCRC = cpu_to_le16( crc_itu_t(0, (char *)lvid + sizeof(struct tag), @@ -2091,11 +2093,13 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) bool lvid_open = false; uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); - uopt.uid = INVALID_UID; - uopt.gid = INVALID_GID; + /* By default we'll use overflow[ug]id when UDF inode [ug]id == -1 */ + uopt.uid = make_kuid(current_user_ns(), overflowuid); + uopt.gid = make_kgid(current_user_ns(), overflowgid); uopt.umask = 0; uopt.fmode = UDF_INVALID_MODE; uopt.dmode = UDF_INVALID_MODE; + uopt.nls_map = NULL; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) @@ -2276,8 +2280,8 @@ error_out: iput(sbi->s_vat_inode); parse_options_failure: #ifdef CONFIG_UDF_NLS - if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) - unload_nls(sbi->s_nls_map); + if (uopt.nls_map) + unload_nls(uopt.nls_map); #endif if (lvid_open) udf_close_lvid(sb); diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 68c9f1d618f5..9dd3e1b9619e 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -23,14 +23,13 @@ #define UDF_FLAG_NLS_MAP 9 #define UDF_FLAG_UTF8 10 #define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */ -#define UDF_FLAG_UID_IGNORE 12 /* use sb uid instead of on disk uid */ -#define UDF_FLAG_GID_FORGET 13 -#define UDF_FLAG_GID_IGNORE 14 -#define UDF_FLAG_UID_SET 15 -#define UDF_FLAG_GID_SET 16 -#define UDF_FLAG_SESSION_SET 17 -#define UDF_FLAG_LASTBLOCK_SET 18 -#define UDF_FLAG_BLOCKSIZE_SET 19 +#define UDF_FLAG_GID_FORGET 12 +#define UDF_FLAG_UID_SET 13 +#define UDF_FLAG_GID_SET 14 +#define UDF_FLAG_SESSION_SET 15 +#define UDF_FLAG_LASTBLOCK_SET 16 +#define UDF_FLAG_BLOCKSIZE_SET 17 +#define UDF_FLAG_INCONSISTENT 18 #define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001 #define UDF_PART_FLAG_UNALLOC_TABLE 0x0002 diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index f5e0fe78979e..68e8a64d22e0 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -48,6 +48,8 @@ extern __printf(3, 4) void _udf_warn(struct super_block *sb, #define UDF_EXTENT_LENGTH_MASK 0x3FFFFFFF #define UDF_EXTENT_FLAG_MASK 0xC0000000 +#define UDF_INVALID_ID ((uint32_t)-1) + #define UDF_NAME_PAD 4 #define UDF_NAME_LEN 254 #define UDF_NAME_LEN_CS0 255 diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 19eadc807056..31f1f10eecd1 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1390,7 +1390,7 @@ xfs_vm_bmap( /* * The swap code (ab-)uses ->bmap to get a block mapping and then - * bypasseŃ• the file system for actual I/O. We really can't allow + * bypasses the file system for actual I/O. We really can't allow * that on reflinks inodes, so we have to skip out here. And yes, * 0 is the magic code for a bmap error. * diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 761f3189eff2..eed698aa9f16 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -122,7 +122,7 @@ xfs_nfs_get_inode( struct super_block *sb, u64 ino, u32 generation) - { +{ xfs_mount_t *mp = XFS_M(sb); xfs_inode_t *ip; int error; |