[this should fix userland regression from do_change_type() fix last cycle;
we have too little self-test coverage in the area, unfortunately. First
approximation for selftest in the followup to this posting. Review and
testing of both the patch and test would be very welcome]
do_change_type() and do_set_group() are operating on different
aspects of the same thing - propagation graph. The latter
asks for mounts involved to be mounted in namespace(s) the caller
has CAP_SYS_ADMIN for. The former is a mess - originally it
didn't even check that mount *is* mounted. That got fixed,
but the resulting check turns out to be too strict for userland -
in effect, we check that mount is in our namespace, having already
checked that we have CAP_SYS_ADMIN there.
What we really need (in both cases) is
* we only touch mounts that are mounted. Hard requirement,
data corruption if that's get violated.
* we don't allow to mess with a namespace unless you already
have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns).
That's an equivalent of what do_set_group() does; let's extract that
into a helper (may_change_propagation()) and use it in both
do_set_group() and do_change_type().
Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts"
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
diff --git a/fs/namespace.c b/fs/namespace.c
index ddfd4457d338..a191c6519e36 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2862,6 +2862,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
return attach_recursive_mnt(mnt, p, mp);
}
+static int may_change_propagation(const struct mount *m)
+{
+ struct mnt_namespace *ns = m->mnt_ns;
+
+ // it must be mounted in some namespace
+ if (IS_ERR_OR_NULL(ns)) // is_mounted()
+ return -EINVAL;
+ // and the caller must be admin in userns of that namespace
+ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+ return 0;
+}
+
/*
* Sanity check the flags to change_mnt_propagation.
*/
@@ -2898,10 +2911,10 @@ static int do_change_type(struct path *path, int ms_flags)
return -EINVAL;
namespace_lock();
- if (!check_mnt(mnt)) {
- err = -EINVAL;
+ err = may_change_propagation(mnt);
+ if (err)
goto out_unlock;
- }
+
if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse);
if (err)
@@ -3347,18 +3360,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
namespace_lock();
- err = -EINVAL;
- /* To and From must be mounted */
- if (!is_mounted(&from->mnt))
- goto out;
- if (!is_mounted(&to->mnt))
- goto out;
-
- err = -EPERM;
- /* We should be allowed to modify mount namespaces of both mounts */
- if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ err = may_change_propagation(from);
+ if (err)
goto out;
- if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ err = may_change_propagation(to);
+ if (err)
goto out;
err = -EINVAL;
// link with -lcap, can run both as root and as regular user #define _GNU_SOURCE #include <sched.h> #include <sys/capability.h> #include <sys/mount.h> #include <sys/stat.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <errno.h> #include <stdbool.h> _Bool drop_caps(void) { cap_value_t cap_value[] = { CAP_SYS_ADMIN }; cap_t cap = cap_get_proc(); if (!cap) { perror("cap_get_proc"); return false; } return true; } void do_unshare(void) { FILE *f; uid_t uid = geteuid(); gid_t gid = getegid(); unshare(CLONE_NEWNS|CLONE_NEWUSER); f = fopen("/proc/self/uid_map", "w"); fprintf(f, "0 %d 1", uid); fclose(f); f = fopen("/proc/self/setgroups", "w"); fprintf(f, "deny"); fclose(f); f = fopen("/proc/self/gid_map", "w"); fprintf(f, "0 %d 1", gid); fclose(f); mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL); } void bind(char *p) { mount(p, p, NULL, MS_BIND, NULL); } void change_type(char *p, int type) { errno = 0; mount(NULL, p, NULL, type, NULL); } void set_group(int fd1, char *p1, int fd2, char *p2) { int flags = MOVE_MOUNT_SET_GROUP; int n; if (!p1 || !*p1) { p1 = ""; // be kind to old kernels flags |= MOVE_MOUNT_F_EMPTY_PATH; } if (!p2 || !*p2) { p2 = ""; // be kind to old kernels flags |= MOVE_MOUNT_T_EMPTY_PATH; } errno = 0; move_mount(fd1, p1, fd2, p2, flags); } _Bool result(int expected) { if (expected != errno) { printf(" failed: %d != %d\n", expected, errno); return false; } printf(" OK\n"); return true; } int fd1[2], fd2[2]; void in_child(void) { printf("from should be someplace we have permissions for"); set_group(AT_FDCWD, "mnt/a", AT_FDCWD, "/mnt/a/private"); result(EPERM); printf("to should be someplace we have permissions for"); set_group(AT_FDCWD, "/mnt/a", AT_FDCWD, "mnt/a/private"); result(EPERM); printf("change_type: should have permissions for target"); change_type("mnt/locked", MS_PRIVATE); result(EPERM); } void in_parent(void) { printf("from should be mounted (pipes are not)"); set_group(fd1[0], NULL, AT_FDCWD, "/mnt/a/private"); result(EINVAL); printf("to should be mounted (pipes are not)"); set_group(AT_FDCWD, "/mnt", fd1[0], NULL); result(EINVAL); printf("from should be someplace we have permissions for"); set_group(AT_FDCWD, "mnt/a", AT_FDCWD, "/mnt/a/private"); if (result(0)) change_type("/mnt/a/private", MS_PRIVATE); printf("to should be someplace we have permissions for"); set_group(AT_FDCWD, "/mnt", AT_FDCWD, "mnt/a/private"); if (result(0)) change_type("mnt/a/private", MS_PRIVATE); printf("from should be mountpoint"); set_group(AT_FDCWD, "/mnt/a", AT_FDCWD, "/mnt/a/private"); result(EINVAL); printf("to should be mountpoint"); set_group(AT_FDCWD, "/mnt/a", AT_FDCWD, "/mnt/a/private/b"); result(EINVAL); printf("to and from should be on the same filesystem"); mount("none", "/mnt/no-locked", "tmpfs", 0, NULL); set_group(AT_FDCWD, "/mnt/a/shared", AT_FDCWD, "/mnt/no-locked"); result(EINVAL); umount("/mnt/no-locked"); printf("from should contain the counterpart of to"); set_group(AT_FDCWD, "/mnt/a/shared", AT_FDCWD, "/mnt/no-locked"); result(EINVAL); printf("from should not have anything locked in counterpart of to"); set_group(AT_FDCWD, "mnt", AT_FDCWD, "/mnt/no-locked"); if (result(0)) change_type("/mnt/no-locked", MS_PRIVATE); printf("change_type: should have permissions for target"); change_type("mnt/a/private", MS_PRIVATE); result(0); printf("change_type: target should be a mountpoint"); change_type("/mnt/a", MS_PRIVATE); result(EINVAL); chdir("/mnt/a/private"); umount2("/mnt/a/private", MNT_DETACH); printf("change_type: target should be mounted"); change_type(".", MS_PRIVATE); result(EINVAL); } int main() { char path[40]; pid_t child; int root_fd; char c; if (pipe(fd1) < 0 || pipe(fd2) < 0) { perror("pipe"); return -1; } if (!drop_caps()) return -1; do_unshare(); root_fd = open("/", O_PATH); errno = 0; mount("none", "/mnt", "tmpfs", 0, NULL); mkdir("/mnt/a", 0777); mkdir("/mnt/a/private", 0777); mkdir("/mnt/a/private/b", 0777); mkdir("/mnt/a/shared", 0777); mkdir("/mnt/a/slave", 0777); mkdir("/mnt/a/shared-slave", 0777); mkdir("/mnt/locked", 0777); mkdir("/mnt/no-locked", 0777); bind("/mnt/locked"); child = fork(); if (child < 0) { perror("fork"); return -1; } else if (child == 0) { do_unshare(); change_type("/mnt/", MS_SHARED); bind("/mnt/a"); bind("/mnt/a/private"); change_type("/mnt/a/private", MS_PRIVATE); write(fd1[1], &c, 1); read(fd2[0], &c, 1); fchdir(root_fd); in_child(); write(fd1[1], &c, 1); return 0; } read(fd1[0], &c, 1); sprintf(path, "/proc/%d/root", child); chdir(path); change_type("/mnt", MS_SHARED); bind("/mnt/a/private"); bind("/mnt/a/shared"); bind("/mnt/a/slave"); bind("/mnt/a/slave-shared"); bind("/mnt/no-locked"); change_type("/mnt/a/private", MS_PRIVATE); change_type("/mnt/a/slave", MS_SLAVE); change_type("/mnt/a/shared-slave", MS_SLAVE); change_type("/mnt/a/shared-slave", MS_SHARED); change_type("/mnt/no-locked", MS_PRIVATE); in_parent(); fflush(stdout); write(fd2[1], &c, 1); read(fd1[0], &c, 1); return 0; }
> void do_unshare(void) > { > FILE *f; > uid_t uid = geteuid(); > gid_t gid = getegid(); > unshare(CLONE_NEWNS|CLONE_NEWUSER); > f = fopen("/proc/self/uid_map", "w"); > fprintf(f, "0 %d 1", uid); > fclose(f); > f = fopen("/proc/self/setgroups", "w"); > fprintf(f, "deny"); > fclose(f); > f = fopen("/proc/self/gid_map", "w"); > fprintf(f, "0 %d 1", gid); > fclose(f); > mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL); > } This obviously needs error checking - in this form it won't do anything good without userns enabled (coredump on the first fprintf() in there, since there won't be /proc/self/uid_map); should probably just report CLONE_NEWUSER failure, warn about skipped tests, fall back to unshare(CLONE_NEWNS) and skip everything in in_child()...
© 2016 - 2025 Red Hat, Inc.