aboutsummaryrefslogtreecommitdiffstats
path: root/fs/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/super.c')
-rw-r--r--fs/super.c249
1 files changed, 94 insertions, 155 deletions
diff --git a/fs/super.c b/fs/super.c
index cd352530eca9..8d39e4f11cfa 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -31,7 +31,6 @@
#include <linux/mutex.h>
#include <linux/backing-dev.h>
#include <linux/rculist_bl.h>
-#include <linux/cleancache.h>
#include <linux/fscrypt.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
@@ -260,14 +259,13 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
s->s_time_gran = 1000000000;
s->s_time_min = TIME64_MIN;
s->s_time_max = TIME64_MAX;
- s->cleancache_poolid = CLEANCACHE_NO_POOL;
s->s_shrink.seeks = DEFAULT_SEEKS;
s->s_shrink.scan_objects = super_cache_scan;
s->s_shrink.count_objects = super_cache_count;
s->s_shrink.batch = 1024;
s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
- if (prealloc_shrinker(&s->s_shrink))
+ if (prealloc_shrinker(&s->s_shrink, "sb-%s", type->name))
goto fail;
if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink))
goto fail;
@@ -293,7 +291,7 @@ static void __put_super(struct super_block *s)
WARN_ON(s->s_inode_lru.node);
WARN_ON(!list_empty(&s->s_mounts));
security_sb_free(s);
- fscrypt_sb_free(s);
+ fscrypt_destroy_keyring(s);
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
call_rcu(&s->rcu, destroy_super_rcu);
@@ -307,7 +305,7 @@ static void __put_super(struct super_block *s)
* Drops a temporary reference, frees superblock if there's no
* references left.
*/
-static void put_super(struct super_block *sb)
+void put_super(struct super_block *sb)
{
spin_lock(&sb_lock);
__put_super(sb);
@@ -330,7 +328,6 @@ void deactivate_locked_super(struct super_block *s)
{
struct file_system_type *fs = s->s_type;
if (atomic_dec_and_test(&s->s_active)) {
- cleancache_invalidate_fs(s);
unregister_shrinker(&s->s_shrink);
fs->kill_sb(s);
@@ -361,7 +358,7 @@ EXPORT_SYMBOL(deactivate_locked_super);
*/
void deactivate_super(struct super_block *s)
{
- if (!atomic_add_unless(&s->s_active, -1, 1)) {
+ if (!atomic_add_unless(&s->s_active, -1, 1)) {
down_write(&s->s_umount);
deactivate_locked_super(s);
}
@@ -426,6 +423,35 @@ bool trylock_super(struct super_block *sb)
}
/**
+ * retire_super - prevents superblock from being reused
+ * @sb: superblock to retire
+ *
+ * The function marks superblock to be ignored in superblock test, which
+ * prevents it from being reused for any new mounts. If the superblock has
+ * a private bdi, it also unregisters it, but doesn't reduce the refcount
+ * of the superblock to prevent potential races. The refcount is reduced
+ * by generic_shutdown_super(). The function can not be called
+ * concurrently with generic_shutdown_super(). It is safe to call the
+ * function multiple times, subsequent calls have no effect.
+ *
+ * The marker will affect the re-use only for block-device-based
+ * superblocks. Other superblocks will still get marked if this function
+ * is used, but that will not affect their reusability.
+ */
+void retire_super(struct super_block *sb)
+{
+ WARN_ON(!sb->s_bdev);
+ down_write(&sb->s_umount);
+ if (sb->s_iflags & SB_I_PERSB_BDI) {
+ bdi_unregister(sb->s_bdi);
+ sb->s_iflags &= ~SB_I_PERSB_BDI;
+ }
+ sb->s_iflags |= SB_I_RETIRED;
+ up_write(&sb->s_umount);
+}
+EXPORT_SYMBOL(retire_super);
+
+/**
* generic_shutdown_super - common helper for ->kill_sb()
* @sb: superblock to kill
*
@@ -454,6 +480,8 @@ void generic_shutdown_super(struct super_block *sb)
evict_inodes(sb);
/* only nonzero refcount inodes can have marks */
fsnotify_sb_delete(sb);
+ fscrypt_destroy_keyring(sb);
+ security_sb_delete(sb);
if (sb->s_dio_done_wq) {
destroy_workqueue(sb->s_dio_done_wq);
@@ -475,6 +503,8 @@ void generic_shutdown_super(struct super_block *sb)
spin_unlock(&sb_lock);
up_write(&sb->s_umount);
if (sb->s_bdi != &noop_backing_dev_info) {
+ if (sb->s_iflags & SB_I_PERSB_BDI)
+ bdi_unregister(sb->s_bdi);
bdi_put(sb->s_bdi);
sb->s_bdi = &noop_backing_dev_info;
}
@@ -740,7 +770,14 @@ void iterate_supers_type(struct file_system_type *type,
EXPORT_SYMBOL(iterate_supers_type);
-static struct super_block *__get_super(struct block_device *bdev, bool excl)
+/**
+ * get_super - get the superblock of a device
+ * @bdev: device to get the superblock for
+ *
+ * Scans the superblock list and finds the superblock of the file system
+ * mounted on the device given. %NULL is returned if no match is found.
+ */
+struct super_block *get_super(struct block_device *bdev)
{
struct super_block *sb;
@@ -755,17 +792,11 @@ rescan:
if (sb->s_bdev == bdev) {
sb->s_count++;
spin_unlock(&sb_lock);
- if (!excl)
- down_read(&sb->s_umount);
- else
- down_write(&sb->s_umount);
+ down_read(&sb->s_umount);
/* still alive? */
if (sb->s_root && (sb->s_flags & SB_BORN))
return sb;
- if (!excl)
- up_read(&sb->s_umount);
- else
- up_write(&sb->s_umount);
+ up_read(&sb->s_umount);
/* nope, got unmounted */
spin_lock(&sb_lock);
__put_super(sb);
@@ -777,66 +808,6 @@ rescan:
}
/**
- * get_super - get the superblock of a device
- * @bdev: device to get the superblock for
- *
- * Scans the superblock list and finds the superblock of the file system
- * mounted on the device given. %NULL is returned if no match is found.
- */
-struct super_block *get_super(struct block_device *bdev)
-{
- return __get_super(bdev, false);
-}
-EXPORT_SYMBOL(get_super);
-
-static struct super_block *__get_super_thawed(struct block_device *bdev,
- bool excl)
-{
- while (1) {
- struct super_block *s = __get_super(bdev, excl);
- if (!s || s->s_writers.frozen == SB_UNFROZEN)
- return s;
- if (!excl)
- up_read(&s->s_umount);
- else
- up_write(&s->s_umount);
- wait_event(s->s_writers.wait_unfrozen,
- s->s_writers.frozen == SB_UNFROZEN);
- put_super(s);
- }
-}
-
-/**
- * get_super_thawed - get thawed superblock of a device
- * @bdev: device to get the superblock for
- *
- * Scans the superblock list and finds the superblock of the file system
- * mounted on the device. The superblock is returned once it is thawed
- * (or immediately if it was not frozen). %NULL is returned if no match
- * is found.
- */
-struct super_block *get_super_thawed(struct block_device *bdev)
-{
- return __get_super_thawed(bdev, false);
-}
-EXPORT_SYMBOL(get_super_thawed);
-
-/**
- * get_super_exclusive_thawed - get thawed superblock of a device
- * @bdev: device to get the superblock for
- *
- * Scans the superblock list and finds the superblock of the file system
- * mounted on the device. The superblock is returned once it is thawed
- * (or immediately if it was not frozen) and s_umount semaphore is held
- * in exclusive mode. %NULL is returned if no match is found.
- */
-struct super_block *get_super_exclusive_thawed(struct block_device *bdev)
-{
- return __get_super_thawed(bdev, true);
-}
-EXPORT_SYMBOL(get_super_exclusive_thawed);
-
-/**
* get_active_super - get an active reference to the superblock of a device
* @bdev: device to get the superblock for
*
@@ -867,7 +838,7 @@ restart:
return NULL;
}
-struct super_block *user_get_super(dev_t dev)
+struct super_block *user_get_super(dev_t dev, bool excl)
{
struct super_block *sb;
@@ -879,11 +850,17 @@ rescan:
if (sb->s_dev == dev) {
sb->s_count++;
spin_unlock(&sb_lock);
- down_read(&sb->s_umount);
+ if (excl)
+ down_write(&sb->s_umount);
+ else
+ down_read(&sb->s_umount);
/* still alive? */
if (sb->s_root && (sb->s_flags & SB_BORN))
return sb;
- up_read(&sb->s_umount);
+ if (excl)
+ up_write(&sb->s_umount);
+ else
+ up_read(&sb->s_umount);
/* nope, got unmounted */
spin_lock(&sb_lock);
__put_super(sb);
@@ -918,7 +895,8 @@ int reconfigure_super(struct fs_context *fc)
if (fc->sb_flags_mask & SB_RDONLY) {
#ifdef CONFIG_BLOCK
- if (!(fc->sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev))
+ if (!(fc->sb_flags & SB_RDONLY) && sb->s_bdev &&
+ bdev_read_only(sb->s_bdev))
return -EACCES;
#endif
@@ -1254,8 +1232,10 @@ static int set_bdev_super(struct super_block *s, void *data)
{
s->s_bdev = data;
s->s_dev = s->s_bdev->bd_dev;
- s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
+ s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
+ if (bdev_stable_writes(s->s_bdev))
+ s->s_iflags |= SB_I_STABLE_WRITES;
return 0;
}
@@ -1266,7 +1246,7 @@ static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
{
- return s->s_bdev == fc->sget_key;
+ return !(s->s_iflags & SB_I_RETIRED) && s->s_bdev == fc->sget_key;
}
/**
@@ -1302,8 +1282,8 @@ int get_tree_bdev(struct fs_context *fc,
mutex_lock(&bdev->bd_fsfreeze_mutex);
if (bdev->bd_fsfreeze_count > 0) {
mutex_unlock(&bdev->bd_fsfreeze_mutex);
- blkdev_put(bdev, mode);
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
+ blkdev_put(bdev, mode);
return -EBUSY;
}
@@ -1326,9 +1306,9 @@ int get_tree_bdev(struct fs_context *fc,
}
/*
- * s_umount nests inside bd_mutex during
+ * s_umount nests inside open_mutex during
* __invalidate_device(). blkdev_put() acquires
- * bd_mutex and can't be called under s_umount. Drop
+ * open_mutex and can't be called under s_umount. Drop
* s_umount temporarily. This is safe as we're
* holding an active reference.
*/
@@ -1338,6 +1318,8 @@ int get_tree_bdev(struct fs_context *fc,
} else {
s->s_mode = mode;
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
+ shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
+ fc->fs_type->name, s->s_id);
sb_set_blocksize(s, block_size(bdev));
error = fill_super(s, fc);
if (error) {
@@ -1357,7 +1339,7 @@ EXPORT_SYMBOL(get_tree_bdev);
static int test_bdev_super(struct super_block *s, void *data)
{
- return (void *)s->s_bdev == data;
+ return !(s->s_iflags & SB_I_RETIRED) && (void *)s->s_bdev == data;
}
struct dentry *mount_bdev(struct file_system_type *fs_type,
@@ -1401,9 +1383,9 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
}
/*
- * s_umount nests inside bd_mutex during
+ * s_umount nests inside open_mutex during
* __invalidate_device(). blkdev_put() acquires
- * bd_mutex and can't be called under s_umount. Drop
+ * open_mutex and can't be called under s_umount. Drop
* s_umount temporarily. This is safe as we're
* holding an active reference.
*/
@@ -1413,6 +1395,8 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
} else {
s->s_mode = mode;
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
+ shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
+ fs_type->name, s->s_id);
sb_set_blocksize(s, block_size(bdev));
error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
if (error) {
@@ -1470,8 +1454,8 @@ struct dentry *mount_nodev(struct file_system_type *fs_type,
}
EXPORT_SYMBOL(mount_nodev);
-static int reconfigure_single(struct super_block *s,
- int flags, void *data)
+int reconfigure_single(struct super_block *s,
+ int flags, void *data)
{
struct fs_context *fc;
int ret;
@@ -1598,12 +1582,10 @@ int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
int err;
va_list args;
- bdi = bdi_alloc(GFP_KERNEL);
+ bdi = bdi_alloc(NUMA_NO_NODE);
if (!bdi)
return -ENOMEM;
- bdi->name = sb->s_type->name;
-
va_start(args, fmt);
err = bdi_register_va(bdi, fmt, args);
va_end(args);
@@ -1613,6 +1595,7 @@ int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
}
WARN_ON(sb->s_bdi != &noop_backing_dev_info);
sb->s_bdi = bdi;
+ sb->s_iflags |= SB_I_PERSB_BDI;
return 0;
}
@@ -1631,55 +1614,6 @@ int super_setup_bdi(struct super_block *sb)
}
EXPORT_SYMBOL(super_setup_bdi);
-/*
- * This is an internal function, please use sb_end_{write,pagefault,intwrite}
- * instead.
- */
-void __sb_end_write(struct super_block *sb, int level)
-{
- percpu_up_read(sb->s_writers.rw_sem + level-1);
-}
-EXPORT_SYMBOL(__sb_end_write);
-
-/*
- * This is an internal function, please use sb_start_{write,pagefault,intwrite}
- * instead.
- */
-int __sb_start_write(struct super_block *sb, int level, bool wait)
-{
- bool force_trylock = false;
- int ret = 1;
-
-#ifdef CONFIG_LOCKDEP
- /*
- * We want lockdep to tell us about possible deadlocks with freezing
- * but it's it bit tricky to properly instrument it. Getting a freeze
- * protection works as getting a read lock but there are subtle
- * problems. XFS for example gets freeze protection on internal level
- * twice in some cases, which is OK only because we already hold a
- * freeze protection also on higher level. Due to these cases we have
- * to use wait == F (trylock mode) which must not fail.
- */
- if (wait) {
- int i;
-
- for (i = 0; i < level - 1; i++)
- if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
- force_trylock = true;
- break;
- }
- }
-#endif
- if (wait && !force_trylock)
- percpu_down_read(sb->s_writers.rw_sem + level-1);
- else
- ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
-
- WARN_ON(force_trylock && !ret);
- return ret;
-}
-EXPORT_SYMBOL(__sb_start_write);
-
/**
* sb_wait_write - wait until all writers to given file system finish
* @sb: the super for which we wait
@@ -1716,11 +1650,9 @@ static void lockdep_sb_freeze_acquire(struct super_block *sb)
percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
}
-static void sb_freeze_unlock(struct super_block *sb)
+static void sb_freeze_unlock(struct super_block *sb, int level)
{
- int level;
-
- for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
+ for (level--; level >= 0; level--)
percpu_up_write(sb->s_writers.rw_sem + level);
}
@@ -1791,7 +1723,14 @@ int freeze_super(struct super_block *sb)
sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
/* All writers are done so after syncing there won't be dirty data */
- sync_filesystem(sb);
+ ret = sync_filesystem(sb);
+ if (ret) {
+ sb->s_writers.frozen = SB_UNFROZEN;
+ sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
+ wake_up(&sb->s_writers.wait_unfrozen);
+ deactivate_locked_super(sb);
+ return ret;
+ }
/* Now wait for internal filesystem counter */
sb->s_writers.frozen = SB_FREEZE_FS;
@@ -1803,7 +1742,7 @@ int freeze_super(struct super_block *sb)
printk(KERN_ERR
"VFS:Filesystem freeze failed\n");
sb->s_writers.frozen = SB_UNFROZEN;
- sb_freeze_unlock(sb);
+ sb_freeze_unlock(sb, SB_FREEZE_FS);
wake_up(&sb->s_writers.wait_unfrozen);
deactivate_locked_super(sb);
return ret;
@@ -1820,12 +1759,6 @@ int freeze_super(struct super_block *sb)
}
EXPORT_SYMBOL(freeze_super);
-/**
- * thaw_super -- unlock filesystem
- * @sb: the super to thaw
- *
- * Unlocks the filesystem and marks it writeable again after freeze_super().
- */
static int thaw_super_locked(struct super_block *sb)
{
int error;
@@ -1854,13 +1787,19 @@ static int thaw_super_locked(struct super_block *sb)
}
sb->s_writers.frozen = SB_UNFROZEN;
- sb_freeze_unlock(sb);
+ sb_freeze_unlock(sb, SB_FREEZE_FS);
out:
wake_up(&sb->s_writers.wait_unfrozen);
deactivate_locked_super(sb);
return 0;
}
+/**
+ * thaw_super -- unlock filesystem
+ * @sb: the super to thaw
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_super().
+ */
int thaw_super(struct super_block *sb)
{
down_write(&sb->s_umount);