aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2024-12-22 11:22:18 +0100
committerChristian Brauner <brauner@kernel.org>2025-01-09 16:58:51 +0100
commitbd32073632008979e39ab063acc252a5ce49efe9 (patch)
tree353732425ba8a8dc46ae7e78f42a4aa49e78d74c
parentLinux 6.13-rc1 (diff)
parentfs: prepend statmount.mnt_opts string with security_sb_mnt_opts() (diff)
downloadwireguard-linux-bd32073632008979e39ab063acc252a5ce49efe9.tar.xz
wireguard-linux-bd32073632008979e39ab063acc252a5ce49efe9.zip
Merge patch series "fs: listmount()/statmount() fix and sample program"
Jeff Layton <jlayton@kernel.org> says: We had some recent queries internally asking how to use the new statmount() and listmount() interfaces. I was doing some other work in this area, so I whipped up this tool. My hope is that this will represent something of a "rosetta stone" for how to translate between mountinfo and statmount(), and an example for other people looking to use the new interfaces. It may also be possible to use this as the basis for a listmount() and statmount() testcase. We can call this program, and compare its output to the mountinfo file. The second patch adds security mount options to the existing mnt_opts in the statmount() interface, which I think is the final missing piece here. The alternative to doing that would be to add a new string field for that, but I'm not sure that's worthwhile. * patches from https://lore.kernel.org/r/20241115-statmount-v2-0-cd29aeff9cbb@kernel.org: fs: prepend statmount.mnt_opts string with security_sb_mnt_opts() samples: add a mountinfo program to demonstrate statmount()/listmount() Link: https://lore.kernel.org/r/20241115-statmount-v2-0-cd29aeff9cbb@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/mount.h15
-rw-r--r--fs/namespace.c18
-rw-r--r--include/linux/mount.h3
-rw-r--r--samples/vfs/.gitignore1
-rw-r--r--samples/vfs/Makefile2
-rw-r--r--samples/vfs/mountinfo.c274
6 files changed, 296 insertions, 17 deletions
diff --git a/fs/mount.h b/fs/mount.h
index 185fc56afc13..179f690a0c72 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -38,6 +38,7 @@ struct mount {
struct dentry *mnt_mountpoint;
struct vfsmount mnt;
union {
+ struct rb_node mnt_node; /* node in the ns->mounts rbtree */
struct rcu_head mnt_rcu;
struct llist_node mnt_llist;
};
@@ -51,10 +52,7 @@ struct mount {
struct list_head mnt_child; /* and going through their mnt_child */
struct list_head mnt_instance; /* mount instance on sb->s_mounts */
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
- union {
- struct rb_node mnt_node; /* Under ns->mounts */
- struct list_head mnt_list;
- };
+ struct list_head mnt_list;
struct list_head mnt_expire; /* link in fs-specific expiry list */
struct list_head mnt_share; /* circular list of shared mounts */
struct list_head mnt_slave_list;/* list of slave mounts */
@@ -145,11 +143,16 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
return ns->seq == 0;
}
+static inline bool mnt_ns_attached(const struct mount *mnt)
+{
+ return !RB_EMPTY_NODE(&mnt->mnt_node);
+}
+
static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
{
- WARN_ON(!(mnt->mnt.mnt_flags & MNT_ONRB));
- mnt->mnt.mnt_flags &= ~MNT_ONRB;
+ WARN_ON(!mnt_ns_attached(mnt));
rb_erase(&mnt->mnt_node, &mnt->mnt_ns->mounts);
+ RB_CLEAR_NODE(&mnt->mnt_node);
list_add_tail(&mnt->mnt_list, dt_list);
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 23e81c2a1e3f..1af8da8e1e97 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -344,6 +344,7 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_HLIST_NODE(&mnt->mnt_mp_list);
INIT_LIST_HEAD(&mnt->mnt_umounting);
INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
+ RB_CLEAR_NODE(&mnt->mnt_node);
mnt->mnt.mnt_idmap = &nop_mnt_idmap;
}
return mnt;
@@ -1124,7 +1125,7 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
struct rb_node **link = &ns->mounts.rb_node;
struct rb_node *parent = NULL;
- WARN_ON(mnt->mnt.mnt_flags & MNT_ONRB);
+ WARN_ON(mnt_ns_attached(mnt));
mnt->mnt_ns = ns;
while (*link) {
parent = *link;
@@ -1135,7 +1136,6 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
}
rb_link_node(&mnt->mnt_node, parent, link);
rb_insert_color(&mnt->mnt_node, &ns->mounts);
- mnt->mnt.mnt_flags |= MNT_ONRB;
}
/*
@@ -1305,7 +1305,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
}
mnt->mnt.mnt_flags = old->mnt.mnt_flags;
- mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL|MNT_ONRB);
+ mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
atomic_inc(&sb->s_active);
mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));
@@ -1763,7 +1763,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
/* Gather the mounts to umount */
for (p = mnt; p; p = next_mnt(p, mnt)) {
p->mnt.mnt_flags |= MNT_UMOUNT;
- if (p->mnt.mnt_flags & MNT_ONRB)
+ if (mnt_ns_attached(p))
move_from_ns(p, &tmp_list);
else
list_move(&p->mnt_list, &tmp_list);
@@ -1912,16 +1912,14 @@ static int do_umount(struct mount *mnt, int flags)
event++;
if (flags & MNT_DETACH) {
- if (mnt->mnt.mnt_flags & MNT_ONRB ||
- !list_empty(&mnt->mnt_list))
+ if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list))
umount_tree(mnt, UMOUNT_PROPAGATE);
retval = 0;
} else {
shrink_submounts(mnt);
retval = -EBUSY;
if (!propagate_mount_busy(mnt, 2)) {
- if (mnt->mnt.mnt_flags & MNT_ONRB ||
- !list_empty(&mnt->mnt_list))
+ if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list))
umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
retval = 0;
}
@@ -5038,6 +5036,10 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
if (sb->s_op->show_options) {
size_t start = seq->count;
+ err = security_sb_show_options(seq, sb);
+ if (err)
+ return err;
+
err = sb->s_op->show_options(seq, mnt->mnt_root);
if (err)
return err;
diff --git a/include/linux/mount.h b/include/linux/mount.h
index c34c18b4e8f3..04213d8ef837 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -50,7 +50,7 @@ struct path;
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
- MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | MNT_ONRB)
+ MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
#define MNT_INTERNAL 0x4000
@@ -64,7 +64,6 @@ struct path;
#define MNT_SYNC_UMOUNT 0x2000000
#define MNT_MARKED 0x4000000
#define MNT_UMOUNT 0x8000000
-#define MNT_ONRB 0x10000000
struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */
diff --git a/samples/vfs/.gitignore b/samples/vfs/.gitignore
index 79212d91285b..33a03cffe072 100644
--- a/samples/vfs/.gitignore
+++ b/samples/vfs/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
/test-fsmount
/test-statx
+/mountinfo
diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile
index 6377a678134a..fb9bb33fdc75 100644
--- a/samples/vfs/Makefile
+++ b/samples/vfs/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-userprogs-always-y += test-fsmount test-statx
+userprogs-always-y += test-fsmount test-statx mountinfo
userccflags += -I usr/include
diff --git a/samples/vfs/mountinfo.c b/samples/vfs/mountinfo.c
new file mode 100644
index 000000000000..2b17d244d321
--- /dev/null
+++ b/samples/vfs/mountinfo.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * Use pidfds, nsfds, listmount() and statmount() mimic the
+ * contents of /proc/self/mountinfo.
+ */
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <linux/pidfd.h>
+#include <linux/mount.h>
+#include <linux/nsfs.h>
+#include <unistd.h>
+#include <alloca.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+
+/* max mounts per listmount call */
+#define MAXMOUNTS 1024
+
+/* size of struct statmount (including trailing string buffer) */
+#define STATMOUNT_BUFSIZE 4096
+
+static bool ext_format;
+
+/*
+ * There are no bindings in glibc for listmount() and statmount() (yet),
+ * make our own here.
+ */
+static int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
+ struct statmount *buf, size_t bufsize,
+ unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = mask,
+ };
+
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
+
+ return syscall(__NR_statmount, &req, buf, bufsize, flags);
+}
+
+static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
+ uint64_t last_mnt_id, uint64_t list[], size_t num,
+ unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = last_mnt_id,
+ };
+
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
+
+ return syscall(__NR_listmount, &req, list, num, flags);
+}
+
+static void show_mnt_attrs(uint64_t flags)
+{
+ printf("%s", flags & MOUNT_ATTR_RDONLY ? "ro" : "rw");
+
+ if (flags & MOUNT_ATTR_NOSUID)
+ printf(",nosuid");
+ if (flags & MOUNT_ATTR_NODEV)
+ printf(",nodev");
+ if (flags & MOUNT_ATTR_NOEXEC)
+ printf(",noexec");
+
+ switch (flags & MOUNT_ATTR__ATIME) {
+ case MOUNT_ATTR_RELATIME:
+ printf(",relatime");
+ break;
+ case MOUNT_ATTR_NOATIME:
+ printf(",noatime");
+ break;
+ case MOUNT_ATTR_STRICTATIME:
+ /* print nothing */
+ break;
+ }
+
+ if (flags & MOUNT_ATTR_NODIRATIME)
+ printf(",nodiratime");
+ if (flags & MOUNT_ATTR_NOSYMFOLLOW)
+ printf(",nosymfollow");
+ if (flags & MOUNT_ATTR_IDMAP)
+ printf(",idmapped");
+}
+
+static void show_propagation(struct statmount *sm)
+{
+ if (sm->mnt_propagation & MS_SHARED)
+ printf(" shared:%llu", sm->mnt_peer_group);
+ if (sm->mnt_propagation & MS_SLAVE) {
+ printf(" master:%llu", sm->mnt_master);
+ if (sm->propagate_from && sm->propagate_from != sm->mnt_master)
+ printf(" propagate_from:%llu", sm->propagate_from);
+ }
+ if (sm->mnt_propagation & MS_UNBINDABLE)
+ printf(" unbindable");
+}
+
+static void show_sb_flags(uint64_t flags)
+{
+ printf("%s", flags & MS_RDONLY ? "ro" : "rw");
+ if (flags & MS_SYNCHRONOUS)
+ printf(",sync");
+ if (flags & MS_DIRSYNC)
+ printf(",dirsync");
+ if (flags & MS_MANDLOCK)
+ printf(",mand");
+ if (flags & MS_LAZYTIME)
+ printf(",lazytime");
+}
+
+static int dump_mountinfo(uint64_t mnt_id, uint64_t mnt_ns_id)
+{
+ int ret;
+ struct statmount *buf = alloca(STATMOUNT_BUFSIZE);
+ const uint64_t mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC |
+ STATMOUNT_PROPAGATE_FROM | STATMOUNT_FS_TYPE |
+ STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT |
+ STATMOUNT_MNT_OPTS | STATMOUNT_FS_SUBTYPE |
+ STATMOUNT_SB_SOURCE;
+
+ ret = statmount(mnt_id, mnt_ns_id, mask, buf, STATMOUNT_BUFSIZE, 0);
+ if (ret < 0) {
+ perror("statmount");
+ return 1;
+ }
+
+ if (ext_format)
+ printf("0x%lx 0x%lx 0x%llx ", mnt_ns_id, mnt_id, buf->mnt_parent_id);
+
+ printf("%u %u %u:%u %s %s ", buf->mnt_id_old, buf->mnt_parent_id_old,
+ buf->sb_dev_major, buf->sb_dev_minor,
+ &buf->str[buf->mnt_root],
+ &buf->str[buf->mnt_point]);
+ show_mnt_attrs(buf->mnt_attr);
+ show_propagation(buf);
+
+ printf(" - %s", &buf->str[buf->fs_type]);
+ if (buf->mask & STATMOUNT_FS_SUBTYPE)
+ printf(".%s", &buf->str[buf->fs_subtype]);
+ if (buf->mask & STATMOUNT_SB_SOURCE)
+ printf(" %s ", &buf->str[buf->sb_source]);
+ else
+ printf(" :none ");
+
+ show_sb_flags(buf->sb_flags);
+ if (buf->mask & STATMOUNT_MNT_OPTS)
+ printf(",%s", &buf->str[buf->mnt_opts]);
+ printf("\n");
+ return 0;
+}
+
+static int dump_mounts(uint64_t mnt_ns_id)
+{
+ uint64_t mntid[MAXMOUNTS];
+ uint64_t last_mnt_id = 0;
+ ssize_t count;
+ int i;
+
+ /*
+ * Get a list of all mntids in mnt_ns_id. If it returns MAXMOUNTS
+ * mounts, then go again until we get everything.
+ */
+ do {
+ count = listmount(LSMT_ROOT, mnt_ns_id, last_mnt_id, mntid, MAXMOUNTS, 0);
+ if (count < 0 || count > MAXMOUNTS) {
+ errno = count < 0 ? errno : count;
+ perror("listmount");
+ return 1;
+ }
+
+ /* Walk the returned mntids and print info about each */
+ for (i = 0; i < count; ++i) {
+ int ret = dump_mountinfo(mntid[i], mnt_ns_id);
+
+ if (ret != 0)
+ return ret;
+ }
+ /* Set up last_mnt_id to pick up where we left off */
+ last_mnt_id = mntid[count - 1];
+ } while (count == MAXMOUNTS);
+ return 0;
+}
+
+static void usage(const char * const prog)
+{
+ printf("Usage:\n");
+ printf("%s [-e] [-p pid] [-r] [-h]\n", prog);
+ printf(" -e: extended format\n");
+ printf(" -h: print usage message\n");
+ printf(" -p: get mount namespace from given pid\n");
+ printf(" -r: recursively print all mounts in all child namespaces\n");
+}
+
+int main(int argc, char * const *argv)
+{
+ struct mnt_ns_info mni = { .size = MNT_NS_INFO_SIZE_VER0 };
+ int pidfd, mntns, ret, opt;
+ pid_t pid = getpid();
+ bool recursive = false;
+
+ while ((opt = getopt(argc, argv, "ehp:r")) != -1) {
+ switch (opt) {
+ case 'e':
+ ext_format = true;
+ break;
+ case 'h':
+ usage(argv[0]);
+ return 0;
+ case 'p':
+ pid = atoi(optarg);
+ break;
+ case 'r':
+ recursive = true;
+ break;
+ }
+ }
+
+ /* Get a pidfd for pid */
+ pidfd = syscall(SYS_pidfd_open, pid, 0);
+ if (pidfd < 0) {
+ perror("pidfd_open");
+ return 1;
+ }
+
+ /* Get the mnt namespace for pidfd */
+ mntns = ioctl(pidfd, PIDFD_GET_MNT_NAMESPACE, NULL);
+ if (mntns < 0) {
+ perror("PIDFD_GET_MNT_NAMESPACE");
+ return 1;
+ }
+ close(pidfd);
+
+ /* get info about mntns. In particular, the mnt_ns_id */
+ ret = ioctl(mntns, NS_MNT_GET_INFO, &mni);
+ if (ret < 0) {
+ perror("NS_MNT_GET_INFO");
+ return 1;
+ }
+
+ do {
+ int ret;
+
+ ret = dump_mounts(mni.mnt_ns_id);
+ if (ret)
+ return ret;
+
+ if (!recursive)
+ break;
+
+ /* get the next mntns (and overwrite the old mount ns info) */
+ ret = ioctl(mntns, NS_MNT_GET_NEXT, &mni);
+ close(mntns);
+ mntns = ret;
+ } while (mntns >= 0);
+
+ return 0;
+}