From a3b3c5627c8301ac850962b04f645dfab81e6a60 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Apr 2015 20:33:53 -0500 Subject: mnt: Use hlist_move_list in namespace_unlock Small cleanup to make the code more readable and maintainable. Signed-off-by: Eric Biederman --- fs/namespace.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 82ef1405260e..e1ee57206eef 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1298,17 +1298,15 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static void namespace_unlock(void) { - struct hlist_head head = unmounted; + struct hlist_head head; - if (likely(hlist_empty(&head))) { - up_write(&namespace_sem); - return; - } + hlist_move_list(&unmounted, &head); - head.first->pprev = &head.first; - INIT_HLIST_HEAD(&unmounted); up_write(&namespace_sem); + if (likely(hlist_empty(&head))) + return; + synchronize_rcu(); group_pin_kill(&head); -- cgit v1.2.3-59-g8ed1b From e819f152104c9f7c9fe50e1aecce6f5d4bf06d65 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 24 Dec 2014 07:20:01 -0600 Subject: mnt: Improve the umount_tree flags - Remove the unneeded declaration from pnode.h - Mark umount_tree static as it has no callers outside of namespace.c - Define an enumeration of umount_tree's flags. - Pass umount_tree's flags in by name This removes the magic numbers 0, 1 and 2 making the code a little clearer and makes it possible for there to be lazy unmounts that don't propagate. Which is what __detach_mounts actually wants for example. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 31 ++++++++++++++++--------------- fs/pnode.h | 1 - 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index e1ee57206eef..e06e36777b90 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1317,14 +1317,15 @@ static inline void namespace_lock(void) down_write(&namespace_sem); } +enum umount_tree_flags { + UMOUNT_SYNC = 1, + UMOUNT_PROPAGATE = 2, +}; /* * mount_lock must be held * namespace_sem must be held for write - * how = 0 => just this tree, don't propagate - * how = 1 => propagate; we know that nobody else has reference to any victims - * how = 2 => lazy umount */ -void umount_tree(struct mount *mnt, int how) +static void umount_tree(struct mount *mnt, enum umount_tree_flags how) { HLIST_HEAD(tmp_list); struct mount *p; @@ -1337,7 +1338,7 @@ void umount_tree(struct mount *mnt, int how) hlist_for_each_entry(p, &tmp_list, mnt_hash) list_del_init(&p->mnt_child); - if (how) + if (how & UMOUNT_PROPAGATE) propagate_umount(&tmp_list); while (!hlist_empty(&tmp_list)) { @@ -1347,7 +1348,7 @@ void umount_tree(struct mount *mnt, int how) list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; - if (how < 2) + if (how & UMOUNT_SYNC) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); @@ -1445,14 +1446,14 @@ static int do_umount(struct mount *mnt, int flags) if (flags & MNT_DETACH) { if (!list_empty(&mnt->mnt_list)) - umount_tree(mnt, 2); + umount_tree(mnt, UMOUNT_PROPAGATE); retval = 0; } else { shrink_submounts(mnt); retval = -EBUSY; if (!propagate_mount_busy(mnt, 2)) { if (!list_empty(&mnt->mnt_list)) - umount_tree(mnt, 1); + umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC); retval = 0; } } @@ -1484,7 +1485,7 @@ void __detach_mounts(struct dentry *dentry) lock_mount_hash(); while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); - umount_tree(mnt, 2); + umount_tree(mnt, UMOUNT_PROPAGATE); } unlock_mount_hash(); put_mountpoint(mp); @@ -1646,7 +1647,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, out: if (res) { lock_mount_hash(); - umount_tree(res, 0); + umount_tree(res, UMOUNT_SYNC); unlock_mount_hash(); } return q; @@ -1670,7 +1671,7 @@ void drop_collected_mounts(struct vfsmount *mnt) { namespace_lock(); lock_mount_hash(); - umount_tree(real_mount(mnt), 0); + umount_tree(real_mount(mnt), UMOUNT_SYNC); unlock_mount_hash(); namespace_unlock(); } @@ -1853,7 +1854,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, out_cleanup_ids: while (!hlist_empty(&tree_list)) { child = hlist_entry(tree_list.first, struct mount, mnt_hash); - umount_tree(child, 0); + umount_tree(child, UMOUNT_SYNC); } unlock_mount_hash(); cleanup_group_ids(source_mnt, NULL); @@ -2033,7 +2034,7 @@ static int do_loopback(struct path *path, const char *old_name, err = graft_tree(mnt, parent, mp); if (err) { lock_mount_hash(); - umount_tree(mnt, 0); + umount_tree(mnt, UMOUNT_SYNC); unlock_mount_hash(); } out2: @@ -2404,7 +2405,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) while (!list_empty(&graveyard)) { mnt = list_first_entry(&graveyard, struct mount, mnt_expire); touch_mnt_namespace(mnt->mnt_ns); - umount_tree(mnt, 1); + umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC); } unlock_mount_hash(); namespace_unlock(); @@ -2475,7 +2476,7 @@ static void shrink_submounts(struct mount *mnt) m = list_first_entry(&graveyard, struct mount, mnt_expire); touch_mnt_namespace(m->mnt_ns); - umount_tree(m, 1); + umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC); } } } diff --git a/fs/pnode.h b/fs/pnode.h index 4a246358b031..16afc3d6d2f2 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -47,7 +47,6 @@ int get_dominating_id(struct mount *mnt, const struct path *root); unsigned int mnt_get_count(struct mount *mnt); void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *); -void umount_tree(struct mount *, int); struct mount *copy_tree(struct mount *, struct dentry *, int); bool is_path_reachable(struct mount *, struct dentry *, const struct path *root); -- cgit v1.2.3-59-g8ed1b From 8318e667f176f7ea34451a1a530634e293f216ac Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 24 Dec 2014 07:35:10 -0600 Subject: mnt: Don't propagate umounts in __detach_mounts Invoking mount propagation from __detach_mounts is inefficient and wrong. It is inefficient because __detach_mounts already walks the list of mounts that where something needs to be done, and mount propagation walks some subset of those mounts again. It is actively wrong because if the dentry that is passed to __detach_mounts is not part of the path to a mount that mount should not be affected. change_mnt_propagation(p,MS_PRIVATE) modifies the mount propagation tree of a master mount so it's slaves are connected to another master if possible. Which means even removing a mount from the middle of a mount tree with __detach_mounts will not deprive any mount propagated mount events. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index e06e36777b90..c68d9fc912e7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1485,7 +1485,7 @@ void __detach_mounts(struct dentry *dentry) lock_mount_hash(); while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); - umount_tree(mnt, UMOUNT_PROPAGATE); + umount_tree(mnt, 0); } unlock_mount_hash(); put_mountpoint(mp); -- cgit v1.2.3-59-g8ed1b From c003b26ff98ca04a180ff34c38c007a3998d62f9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 18 Dec 2014 13:10:48 -0600 Subject: mnt: In umount_tree reuse mnt_list instead of mnt_hash umount_tree builds a list of mounts that need to be unmounted. Utilize mnt_list for this purpose instead of mnt_hash. This begins to allow keeping a mount on the mnt_hash after it is unmounted, which is necessary for a properly functioning MNT_LOCKED implementation. The fact that mnt_list is an ordinary list makding available list_move is nice bonus. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 20 +++++++++++--------- fs/pnode.c | 6 +++--- fs/pnode.h | 2 +- 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index c68d9fc912e7..54cbef129f4a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1327,23 +1327,25 @@ enum umount_tree_flags { */ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) { - HLIST_HEAD(tmp_list); + LIST_HEAD(tmp_list); struct mount *p; - for (p = mnt; p; p = next_mnt(p, mnt)) { - hlist_del_init_rcu(&p->mnt_hash); - hlist_add_head(&p->mnt_hash, &tmp_list); - } + /* Gather the mounts to umount */ + for (p = mnt; p; p = next_mnt(p, mnt)) + list_move(&p->mnt_list, &tmp_list); - hlist_for_each_entry(p, &tmp_list, mnt_hash) + /* Hide the mounts from lookup_mnt and mnt_mounts */ + list_for_each_entry(p, &tmp_list, mnt_list) { + hlist_del_init_rcu(&p->mnt_hash); list_del_init(&p->mnt_child); + } + /* Add propogated mounts to the tmp_list */ if (how & UMOUNT_PROPAGATE) propagate_umount(&tmp_list); - while (!hlist_empty(&tmp_list)) { - p = hlist_entry(tmp_list.first, struct mount, mnt_hash); - hlist_del_init_rcu(&p->mnt_hash); + while (!list_empty(&tmp_list)) { + p = list_first_entry(&tmp_list, struct mount, mnt_list); list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); diff --git a/fs/pnode.c b/fs/pnode.c index 260ac8f898a4..bf012af709dd 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -384,7 +384,7 @@ static void __propagate_umount(struct mount *mnt) if (child && list_empty(&child->mnt_mounts)) { list_del_init(&child->mnt_child); hlist_del_init_rcu(&child->mnt_hash); - hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); + list_move_tail(&child->mnt_list, &mnt->mnt_list); } } } @@ -396,11 +396,11 @@ static void __propagate_umount(struct mount *mnt) * * vfsmount lock must be held for write */ -int propagate_umount(struct hlist_head *list) +int propagate_umount(struct list_head *list) { struct mount *mnt; - hlist_for_each_entry(mnt, list, mnt_hash) + list_for_each_entry(mnt, list, mnt_list) __propagate_umount(mnt); return 0; } diff --git a/fs/pnode.h b/fs/pnode.h index 16afc3d6d2f2..aa6d65df7204 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -40,7 +40,7 @@ static inline void set_mnt_shared(struct mount *mnt) void change_mnt_propagation(struct mount *, int); int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, struct hlist_head *); -int propagate_umount(struct hlist_head *); +int propagate_umount(struct list_head *); int propagate_mount_busy(struct mount *, int); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); -- cgit v1.2.3-59-g8ed1b From 590ce4bcbfb4e0462a720a4ad901e84416080bba Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Dec 2014 18:30:08 -0600 Subject: mnt: Add MNT_UMOUNT flag In some instances it is necessary to know if the the unmounting process has begun on a mount. Add MNT_UMOUNT to make that reliably testable. This fix gets used in fixing locked mounts in MNT_DETACH Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 4 +++- fs/pnode.c | 1 + include/linux/mount.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 54cbef129f4a..d1708147eb45 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1331,8 +1331,10 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) struct mount *p; /* Gather the mounts to umount */ - for (p = mnt; p; p = next_mnt(p, mnt)) + for (p = mnt; p; p = next_mnt(p, mnt)) { + p->mnt.mnt_flags |= MNT_UMOUNT; list_move(&p->mnt_list, &tmp_list); + } /* Hide the mounts from lookup_mnt and mnt_mounts */ list_for_each_entry(p, &tmp_list, mnt_list) { diff --git a/fs/pnode.c b/fs/pnode.c index bf012af709dd..ac3aa0d43b90 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -384,6 +384,7 @@ static void __propagate_umount(struct mount *mnt) if (child && list_empty(&child->mnt_mounts)) { list_del_init(&child->mnt_child); hlist_del_init_rcu(&child->mnt_hash); + child->mnt.mnt_flags |= MNT_UMOUNT; list_move_tail(&child->mnt_list, &mnt->mnt_list); } } diff --git a/include/linux/mount.h b/include/linux/mount.h index c2c561dc0114..564beeec5d83 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -61,6 +61,7 @@ struct mnt_namespace; #define MNT_DOOMED 0x1000000 #define MNT_SYNC_UMOUNT 0x2000000 #define MNT_MARKED 0x4000000 +#define MNT_UMOUNT 0x8000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ -- cgit v1.2.3-59-g8ed1b From 411a938b5abc9cb126c41cccf5975ae464fe0f3e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Dec 2014 19:12:07 -0600 Subject: mnt: Delay removal from the mount hash. - Modify __lookup_mnt_hash_last to ignore mounts that have MNT_UMOUNTED set. - Don't remove mounts from the mount hash table in propogate_umount - Don't remove mounts from the mount hash table in umount_tree before the entire list of mounts to be umounted is selected. - Remove mounts from the mount hash table as the last thing that happens in the case where a mount has a parent in umount_tree. Mounts without parents are not hashed (by definition). This paves the way for delaying removal from the mount hash table even farther and fixing the MNT_LOCKED vs MNT_DETACH issue. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 13 ++++++++----- fs/pnode.c | 1 - 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index d1708147eb45..083e3401a808 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -632,14 +632,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) */ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) { - struct mount *p, *res; - res = p = __lookup_mnt(mnt, dentry); + struct mount *p, *res = NULL; + p = __lookup_mnt(mnt, dentry); if (!p) goto out; + if (!(p->mnt.mnt_flags & MNT_UMOUNT)) + res = p; hlist_for_each_entry_continue(p, mnt_hash) { if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) break; - res = p; + if (!(p->mnt.mnt_flags & MNT_UMOUNT)) + res = p; } out: return res; @@ -1336,9 +1339,8 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) list_move(&p->mnt_list, &tmp_list); } - /* Hide the mounts from lookup_mnt and mnt_mounts */ + /* Hide the mounts from mnt_mounts */ list_for_each_entry(p, &tmp_list, mnt_list) { - hlist_del_init_rcu(&p->mnt_hash); list_del_init(&p->mnt_child); } @@ -1365,6 +1367,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) p->mnt_mountpoint = p->mnt.mnt_root; p->mnt_parent = p; p->mnt_mp = NULL; + hlist_del_init_rcu(&p->mnt_hash); } change_mnt_propagation(p, MS_PRIVATE); } diff --git a/fs/pnode.c b/fs/pnode.c index ac3aa0d43b90..c27ae38ee250 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -383,7 +383,6 @@ static void __propagate_umount(struct mount *mnt) */ if (child && list_empty(&child->mnt_mounts)) { list_del_init(&child->mnt_child); - hlist_del_init_rcu(&child->mnt_hash); child->mnt.mnt_flags |= MNT_UMOUNT; list_move_tail(&child->mnt_list, &mnt->mnt_list); } -- cgit v1.2.3-59-g8ed1b From 5d88457eb5b86b475422dc882f089203faaeedb5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 3 Jan 2015 05:39:35 -0600 Subject: mnt: On an unmount propagate clearing of MNT_LOCKED A prerequisite of calling umount_tree is that the point where the tree is mounted at is valid to unmount. If we are propagating the effect of the unmount clear MNT_LOCKED in every instance where the same filesystem is mounted on the same mountpoint in the mount tree, as we know (by virtue of the fact that umount_tree was called) that it is safe to reveal what is at that mountpoint. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 3 +++ fs/pnode.c | 20 ++++++++++++++++++++ fs/pnode.h | 1 + 3 files changed, 24 insertions(+) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 083e3401a808..2b12b7a9455d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1333,6 +1333,9 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) LIST_HEAD(tmp_list); struct mount *p; + if (how & UMOUNT_PROPAGATE) + propagate_mount_unlock(mnt); + /* Gather the mounts to umount */ for (p = mnt; p; p = next_mnt(p, mnt)) { p->mnt.mnt_flags |= MNT_UMOUNT; diff --git a/fs/pnode.c b/fs/pnode.c index c27ae38ee250..89890293dd0a 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -361,6 +361,26 @@ int propagate_mount_busy(struct mount *mnt, int refcnt) return ret; } +/* + * Clear MNT_LOCKED when it can be shown to be safe. + * + * mount_lock lock must be held for write + */ +void propagate_mount_unlock(struct mount *mnt) +{ + struct mount *parent = mnt->mnt_parent; + struct mount *m, *child; + + BUG_ON(parent == mnt); + + for (m = propagation_next(parent, parent); m; + m = propagation_next(m, parent)) { + child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); + if (child) + child->mnt.mnt_flags &= ~MNT_LOCKED; + } +} + /* * NOTE: unmounting 'mnt' naturally propagates to all other mounts its * parent propagates to. diff --git a/fs/pnode.h b/fs/pnode.h index aa6d65df7204..af47d4bd7b31 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -42,6 +42,7 @@ int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, struct hlist_head *); int propagate_umount(struct list_head *); int propagate_mount_busy(struct mount *, int); +void propagate_mount_unlock(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); unsigned int mnt_get_count(struct mount *mnt); -- cgit v1.2.3-59-g8ed1b From 0c56fe31420ca599c90240315f7959bf1b4eb6ce Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 5 Jan 2015 13:38:04 -0600 Subject: mnt: Don't propagate unmounts to locked mounts If the first mount in shared subtree is locked don't unmount the shared subtree. This is ensured by walking through the mounts parents before children and marking a mount as unmountable if it is not locked or it is locked but it's parent is marked. This allows recursive mount detach to propagate through a set of mounts when unmounting them would not reveal what is under any locked mount. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/pnode.c | 32 +++++++++++++++++++++++++++++--- fs/pnode.h | 1 + 2 files changed, 30 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/pnode.c b/fs/pnode.c index 89890293dd0a..6367e1e435c6 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -381,6 +381,26 @@ void propagate_mount_unlock(struct mount *mnt) } } +/* + * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted. + */ +static void mark_umount_candidates(struct mount *mnt) +{ + struct mount *parent = mnt->mnt_parent; + struct mount *m; + + BUG_ON(parent == mnt); + + for (m = propagation_next(parent, parent); m; + m = propagation_next(m, parent)) { + struct mount *child = __lookup_mnt_last(&m->mnt, + mnt->mnt_mountpoint); + if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) { + SET_MNT_MARK(child); + } + } +} + /* * NOTE: unmounting 'mnt' naturally propagates to all other mounts its * parent propagates to. @@ -398,10 +418,13 @@ static void __propagate_umount(struct mount *mnt) struct mount *child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); /* - * umount the child only if the child has no - * other children + * umount the child only if the child has no children + * and the child is marked safe to unmount. */ - if (child && list_empty(&child->mnt_mounts)) { + if (!child || !IS_MNT_MARKED(child)) + continue; + CLEAR_MNT_MARK(child); + if (list_empty(&child->mnt_mounts)) { list_del_init(&child->mnt_child); child->mnt.mnt_flags |= MNT_UMOUNT; list_move_tail(&child->mnt_list, &mnt->mnt_list); @@ -420,6 +443,9 @@ int propagate_umount(struct list_head *list) { struct mount *mnt; + list_for_each_entry_reverse(mnt, list, mnt_list) + mark_umount_candidates(mnt); + list_for_each_entry(mnt, list, mnt_list) __propagate_umount(mnt); return 0; diff --git a/fs/pnode.h b/fs/pnode.h index af47d4bd7b31..0fcdbe7ca648 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -19,6 +19,7 @@ #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) +#define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 -- cgit v1.2.3-59-g8ed1b From cd4a40174b71acd021877341684d8bb1dc8ea4ae Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 7 Jan 2015 14:28:26 -0600 Subject: mnt: Fail collect_mounts when applied to unmounted mounts The only users of collect_mounts are in audit_tree.c In audit_trim_trees and audit_add_tree_rule the path passed into collect_mounts is generated from kern_path passed an audit_tree pathname which is guaranteed to be an absolute path. In those cases collect_mounts is obviously intended to work on mounted paths and if a race results in paths that are unmounted when collect_mounts it is reasonable to fail early. The paths passed into audit_tag_tree don't have the absolute path check. But are used to play with fsnotify and otherwise interact with the audit_trees, so again operating only on mounted paths appears reasonable. Avoid having to worry about what happens when we try and audit unmounted filesystems by restricting collect_mounts to mounts that appear in the mount tree. Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 2b12b7a9455d..acc5583764dc 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1669,8 +1669,11 @@ struct vfsmount *collect_mounts(struct path *path) { struct mount *tree; namespace_lock(); - tree = copy_tree(real_mount(path->mnt), path->dentry, - CL_COPY_ALL | CL_PRIVATE); + if (!check_mnt(real_mount(path->mnt))) + tree = ERR_PTR(-EINVAL); + else + tree = copy_tree(real_mount(path->mnt), path->dentry, + CL_COPY_ALL | CL_PRIVATE); namespace_unlock(); if (IS_ERR(tree)) return ERR_CAST(tree); -- cgit v1.2.3-59-g8ed1b From 7bdb11de8ee4f4ae195e2fa19efd304e0b36c63b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 29 Dec 2014 13:03:41 -0600 Subject: mnt: Factor out unhash_mnt from detach_mnt and umount_tree Create a function unhash_mnt that contains the common code between detach_mnt and umount_tree, and use unhash_mnt in place of the common code. This add a unncessary list_del_init(mnt->mnt_child) into umount_tree but given that mnt_child is already empty this extra line is a noop. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index acc5583764dc..e669a3bf86e7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -798,10 +798,8 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) /* * vfsmount lock must be held for write */ -static void detach_mnt(struct mount *mnt, struct path *old_path) +static void unhash_mnt(struct mount *mnt) { - old_path->dentry = mnt->mnt_mountpoint; - old_path->mnt = &mnt->mnt_parent->mnt; mnt->mnt_parent = mnt; mnt->mnt_mountpoint = mnt->mnt.mnt_root; list_del_init(&mnt->mnt_child); @@ -811,6 +809,16 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) mnt->mnt_mp = NULL; } +/* + * vfsmount lock must be held for write + */ +static void detach_mnt(struct mount *mnt, struct path *old_path) +{ + old_path->dentry = mnt->mnt_mountpoint; + old_path->mnt = &mnt->mnt_parent->mnt; + unhash_mnt(mnt); +} + /* * vfsmount lock must be held for write */ @@ -1362,15 +1370,10 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); if (mnt_has_parent(p)) { - hlist_del_init(&p->mnt_mp_list); - put_mountpoint(p->mnt_mp); mnt_add_count(p->mnt_parent, -1); /* old mountpoint will be dropped when we can do that */ p->mnt_ex_mountpoint = p->mnt_mountpoint; - p->mnt_mountpoint = p->mnt.mnt_root; - p->mnt_parent = p; - p->mnt_mp = NULL; - hlist_del_init_rcu(&p->mnt_hash); + unhash_mnt(p); } change_mnt_propagation(p, MS_PRIVATE); } -- cgit v1.2.3-59-g8ed1b From 6a46c5735c29175da55b2fa9d53775182422cdd7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 15 Jan 2015 22:58:33 -0600 Subject: mnt: Factor umount_mnt from umount_tree For future use factor out a function umount_mnt from umount_tree. This function unhashes a mount and remembers where the mount was mounted so that eventually when the code makes it to a sleeping context the mountpoint can be dput. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index e669a3bf86e7..010d5bebcb7e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -819,6 +819,16 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) unhash_mnt(mnt); } +/* + * vfsmount lock must be held for write + */ +static void umount_mnt(struct mount *mnt) +{ + /* old mountpoint will be dropped when we can do that */ + mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint; + unhash_mnt(mnt); +} + /* * vfsmount lock must be held for write */ @@ -1371,9 +1381,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); if (mnt_has_parent(p)) { mnt_add_count(p->mnt_parent, -1); - /* old mountpoint will be dropped when we can do that */ - p->mnt_ex_mountpoint = p->mnt_mountpoint; - unhash_mnt(p); + umount_mnt(p); } change_mnt_propagation(p, MS_PRIVATE); } -- cgit v1.2.3-59-g8ed1b From 820f9f147dcce2602eefd9b575bbbd9ea14f0953 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Apr 2015 16:35:48 -0500 Subject: fs_pin: Allow for the possibility that m_list or s_list go unused. This is needed to support lazily umounting locked mounts. Because the entire unmounted subtree needs to stay together until there are no users with references to any part of the subtree. To support this guarantee that the fs_pin m_list and s_list nodes are initialized by initializing them in init_fs_pin allowing for the possibility that pin_insert_group does not touch them. Further use hlist_del_init in pin_remove so that there is a hlist_unhashed test before the list we attempt to update the previous list item. Signed-off-by: "Eric W. Biederman" --- fs/fs_pin.c | 4 ++-- include/linux/fs_pin.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fs_pin.c b/fs/fs_pin.c index b06c98796afb..611b5408f6ec 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -9,8 +9,8 @@ static DEFINE_SPINLOCK(pin_lock); void pin_remove(struct fs_pin *pin) { spin_lock(&pin_lock); - hlist_del(&pin->m_list); - hlist_del(&pin->s_list); + hlist_del_init(&pin->m_list); + hlist_del_init(&pin->s_list); spin_unlock(&pin_lock); spin_lock_irq(&pin->wait.lock); pin->done = 1; diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index 9dc4e0384bfb..3886b3bffd7f 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -13,6 +13,8 @@ struct vfsmount; static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *)) { init_waitqueue_head(&p->wait); + INIT_HLIST_NODE(&p->s_list); + INIT_HLIST_NODE(&p->m_list); p->kill = kill; } -- cgit v1.2.3-59-g8ed1b From ce07d891a0891d3c0d0c2d73d577490486b809e1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 Dec 2014 21:37:03 -0600 Subject: mnt: Honor MNT_LOCKED when detaching mounts Modify umount(MNT_DETACH) to keep mounts in the hash table that are locked to their parent mounts, when the parent is lazily unmounted. In mntput_no_expire detach the children from the hash table, depending on mnt_pin_kill in cleanup_mnt to decrement the mnt_count of the children. In __detach_mounts if there are any mounts that have been unmounted but still are on the list of mounts of a mountpoint, remove their children from the mount hash table and those children to the unmounted list so they won't linger potentially indefinitely waiting for their final mntput, now that the mounts serve no purpose. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 29 ++++++++++++++++++++++++++--- fs/pnode.h | 2 ++ 2 files changed, 28 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 010d5bebcb7e..1894d1878dbc 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1099,6 +1099,13 @@ static void mntput_no_expire(struct mount *mnt) rcu_read_unlock(); list_del(&mnt->mnt_instance); + + if (unlikely(!list_empty(&mnt->mnt_mounts))) { + struct mount *p, *tmp; + list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { + umount_mnt(p); + } + } unlock_mount_hash(); if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { @@ -1370,6 +1377,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) propagate_umount(&tmp_list); while (!list_empty(&tmp_list)) { + bool disconnect; p = list_first_entry(&tmp_list, struct mount, mnt_list); list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); @@ -1378,10 +1386,18 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) if (how & UMOUNT_SYNC) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; - pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); + disconnect = !IS_MNT_LOCKED_AND_LAZY(p); + + pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, + disconnect ? &unmounted : NULL); if (mnt_has_parent(p)) { mnt_add_count(p->mnt_parent, -1); - umount_mnt(p); + if (!disconnect) { + /* Don't forget about p */ + list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts); + } else { + umount_mnt(p); + } } change_mnt_propagation(p, MS_PRIVATE); } @@ -1506,7 +1522,14 @@ void __detach_mounts(struct dentry *dentry) lock_mount_hash(); while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); - umount_tree(mnt, 0); + if (mnt->mnt.mnt_flags & MNT_UMOUNT) { + struct mount *p, *tmp; + list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { + hlist_add_head(&p->mnt_umount.s_list, &unmounted); + umount_mnt(p); + } + } + else umount_tree(mnt, 0); } unlock_mount_hash(); put_mountpoint(mp); diff --git a/fs/pnode.h b/fs/pnode.h index 0fcdbe7ca648..7114ce6e6b9e 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -20,6 +20,8 @@ #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) +#define IS_MNT_LOCKED_AND_LAZY(m) \ + (((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 -- cgit v1.2.3-59-g8ed1b From f53e57975151f54ad8caa1b0ac8a78091cd5700a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 19 Jan 2015 11:48:45 -0600 Subject: mnt: Fix the error check in __detach_mounts lookup_mountpoint can return either NULL or an error value. Update the test in __detach_mounts to test for an error value to avoid pathological cases causing a NULL pointer dereferences. The callers of __detach_mounts should prevent it from ever being called on an unlinked dentry but don't take any chances. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 1894d1878dbc..e8f7f8c58c3c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1516,7 +1516,7 @@ void __detach_mounts(struct dentry *dentry) namespace_lock(); mp = lookup_mountpoint(dentry); - if (!mp) + if (IS_ERR_OR_NULL(mp)) goto out_unlock; lock_mount_hash(); -- cgit v1.2.3-59-g8ed1b From e0c9c0afd2fc958ffa34b697972721d81df8a56f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Apr 2015 18:30:06 -0500 Subject: mnt: Update detach_mounts to leave mounts connected Now that it is possible to lazily unmount an entire mount tree and leave the individual mounts connected to each other add a new flag UMOUNT_CONNECTED to umount_tree to force this behavior and use this flag in detach_mounts. This closes a bug where the deletion of a file or directory could trigger an unmount and reveal data under a mount point. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index e8f7f8c58c3c..1f4f9dac6e5a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1348,6 +1348,7 @@ static inline void namespace_lock(void) enum umount_tree_flags { UMOUNT_SYNC = 1, UMOUNT_PROPAGATE = 2, + UMOUNT_CONNECTED = 4, }; /* * mount_lock must be held @@ -1386,7 +1387,10 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) if (how & UMOUNT_SYNC) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; - disconnect = !IS_MNT_LOCKED_AND_LAZY(p); + disconnect = !(((how & UMOUNT_CONNECTED) && + mnt_has_parent(p) && + (p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) || + IS_MNT_LOCKED_AND_LAZY(p)); pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, disconnect ? &unmounted : NULL); @@ -1529,7 +1533,7 @@ void __detach_mounts(struct dentry *dentry) umount_mnt(p); } } - else umount_tree(mnt, 0); + else umount_tree(mnt, UMOUNT_CONNECTED); } unlock_mount_hash(); put_mountpoint(mp); -- cgit v1.2.3-59-g8ed1b