[this should fix userland regression from do_change_type() fix last cycle;
we have too little self-test coverage in the area, unfortunately. First
approximation for selftest in the followup to this posting. Review and
testing of both the patch and test would be very welcome]
do_change_type() and do_set_group() are operating on different
aspects of the same thing - propagation graph. The latter
asks for mounts involved to be mounted in namespace(s) the caller
has CAP_SYS_ADMIN for. The former is a mess - originally it
didn't even check that mount *is* mounted. That got fixed,
but the resulting check turns out to be too strict for userland -
in effect, we check that mount is in our namespace, having already
checked that we have CAP_SYS_ADMIN there.
What we really need (in both cases) is
* we only touch mounts that are mounted. Hard requirement,
data corruption if that's get violated.
* we don't allow to mess with a namespace unless you already
have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns).
That's an equivalent of what do_set_group() does; let's extract that
into a helper (may_change_propagation()) and use it in both
do_set_group() and do_change_type().
Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts"
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
diff --git a/fs/namespace.c b/fs/namespace.c
index ddfd4457d338..a191c6519e36 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2862,6 +2862,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
return attach_recursive_mnt(mnt, p, mp);
}
+static int may_change_propagation(const struct mount *m)
+{
+ struct mnt_namespace *ns = m->mnt_ns;
+
+ // it must be mounted in some namespace
+ if (IS_ERR_OR_NULL(ns)) // is_mounted()
+ return -EINVAL;
+ // and the caller must be admin in userns of that namespace
+ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+ return 0;
+}
+
/*
* Sanity check the flags to change_mnt_propagation.
*/
@@ -2898,10 +2911,10 @@ static int do_change_type(struct path *path, int ms_flags)
return -EINVAL;
namespace_lock();
- if (!check_mnt(mnt)) {
- err = -EINVAL;
+ err = may_change_propagation(mnt);
+ if (err)
goto out_unlock;
- }
+
if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse);
if (err)
@@ -3347,18 +3360,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
namespace_lock();
- err = -EINVAL;
- /* To and From must be mounted */
- if (!is_mounted(&from->mnt))
- goto out;
- if (!is_mounted(&to->mnt))
- goto out;
-
- err = -EPERM;
- /* We should be allowed to modify mount namespaces of both mounts */
- if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ err = may_change_propagation(from);
+ if (err)
goto out;
- if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ err = may_change_propagation(to);
+ if (err)
goto out;
err = -EINVAL;
// link with -lcap, can run both as root and as regular user
#define _GNU_SOURCE
#include <sched.h>
#include <sys/capability.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <stdbool.h>
_Bool drop_caps(void)
{
cap_value_t cap_value[] = { CAP_SYS_ADMIN };
cap_t cap = cap_get_proc();
if (!cap) {
perror("cap_get_proc");
return false;
}
return true;
}
void do_unshare(void)
{
FILE *f;
uid_t uid = geteuid();
gid_t gid = getegid();
unshare(CLONE_NEWNS|CLONE_NEWUSER);
f = fopen("/proc/self/uid_map", "w");
fprintf(f, "0 %d 1", uid);
fclose(f);
f = fopen("/proc/self/setgroups", "w");
fprintf(f, "deny");
fclose(f);
f = fopen("/proc/self/gid_map", "w");
fprintf(f, "0 %d 1", gid);
fclose(f);
mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL);
}
void bind(char *p)
{
mount(p, p, NULL, MS_BIND, NULL);
}
void change_type(char *p, int type)
{
errno = 0;
mount(NULL, p, NULL, type, NULL);
}
void set_group(int fd1, char *p1, int fd2, char *p2)
{
int flags = MOVE_MOUNT_SET_GROUP;
int n;
if (!p1 || !*p1) {
p1 = ""; // be kind to old kernels
flags |= MOVE_MOUNT_F_EMPTY_PATH;
}
if (!p2 || !*p2) {
p2 = ""; // be kind to old kernels
flags |= MOVE_MOUNT_T_EMPTY_PATH;
}
errno = 0;
move_mount(fd1, p1, fd2, p2, flags);
}
_Bool result(int expected)
{
if (expected != errno) {
printf(" failed: %d != %d\n", expected, errno);
return false;
}
printf(" OK\n");
return true;
}
int fd1[2], fd2[2];
void in_child(void)
{
printf("from should be someplace we have permissions for");
set_group(AT_FDCWD, "mnt/a", AT_FDCWD, "/mnt/a/private");
result(EPERM);
printf("to should be someplace we have permissions for");
set_group(AT_FDCWD, "/mnt/a", AT_FDCWD, "mnt/a/private");
result(EPERM);
printf("change_type: should have permissions for target");
change_type("mnt/locked", MS_PRIVATE);
result(EPERM);
}
void in_parent(void)
{
printf("from should be mounted (pipes are not)");
set_group(fd1[0], NULL, AT_FDCWD, "/mnt/a/private");
result(EINVAL);
printf("to should be mounted (pipes are not)");
set_group(AT_FDCWD, "/mnt", fd1[0], NULL);
result(EINVAL);
printf("from should be someplace we have permissions for");
set_group(AT_FDCWD, "mnt/a", AT_FDCWD, "/mnt/a/private");
if (result(0))
change_type("/mnt/a/private", MS_PRIVATE);
printf("to should be someplace we have permissions for");
set_group(AT_FDCWD, "/mnt", AT_FDCWD, "mnt/a/private");
if (result(0))
change_type("mnt/a/private", MS_PRIVATE);
printf("from should be mountpoint");
set_group(AT_FDCWD, "/mnt/a", AT_FDCWD, "/mnt/a/private");
result(EINVAL);
printf("to should be mountpoint");
set_group(AT_FDCWD, "/mnt/a", AT_FDCWD, "/mnt/a/private/b");
result(EINVAL);
printf("to and from should be on the same filesystem");
mount("none", "/mnt/no-locked", "tmpfs", 0, NULL);
set_group(AT_FDCWD, "/mnt/a/shared", AT_FDCWD, "/mnt/no-locked");
result(EINVAL);
umount("/mnt/no-locked");
printf("from should contain the counterpart of to");
set_group(AT_FDCWD, "/mnt/a/shared", AT_FDCWD, "/mnt/no-locked");
result(EINVAL);
printf("from should not have anything locked in counterpart of to");
set_group(AT_FDCWD, "mnt", AT_FDCWD, "/mnt/no-locked");
if (result(0))
change_type("/mnt/no-locked", MS_PRIVATE);
printf("change_type: should have permissions for target");
change_type("mnt/a/private", MS_PRIVATE);
result(0);
printf("change_type: target should be a mountpoint");
change_type("/mnt/a", MS_PRIVATE);
result(EINVAL);
chdir("/mnt/a/private");
umount2("/mnt/a/private", MNT_DETACH);
printf("change_type: target should be mounted");
change_type(".", MS_PRIVATE);
result(EINVAL);
}
int main()
{
char path[40];
pid_t child;
int root_fd;
char c;
if (pipe(fd1) < 0 || pipe(fd2) < 0) {
perror("pipe");
return -1;
}
if (!drop_caps())
return -1;
do_unshare();
root_fd = open("/", O_PATH);
errno = 0;
mount("none", "/mnt", "tmpfs", 0, NULL);
mkdir("/mnt/a", 0777);
mkdir("/mnt/a/private", 0777);
mkdir("/mnt/a/private/b", 0777);
mkdir("/mnt/a/shared", 0777);
mkdir("/mnt/a/slave", 0777);
mkdir("/mnt/a/shared-slave", 0777);
mkdir("/mnt/locked", 0777);
mkdir("/mnt/no-locked", 0777);
bind("/mnt/locked");
child = fork();
if (child < 0) {
perror("fork");
return -1;
} else if (child == 0) {
do_unshare();
change_type("/mnt/", MS_SHARED);
bind("/mnt/a");
bind("/mnt/a/private");
change_type("/mnt/a/private", MS_PRIVATE);
write(fd1[1], &c, 1);
read(fd2[0], &c, 1);
fchdir(root_fd);
in_child();
write(fd1[1], &c, 1);
return 0;
}
read(fd1[0], &c, 1);
sprintf(path, "/proc/%d/root", child);
chdir(path);
change_type("/mnt", MS_SHARED);
bind("/mnt/a/private");
bind("/mnt/a/shared");
bind("/mnt/a/slave");
bind("/mnt/a/slave-shared");
bind("/mnt/no-locked");
change_type("/mnt/a/private", MS_PRIVATE);
change_type("/mnt/a/slave", MS_SLAVE);
change_type("/mnt/a/shared-slave", MS_SLAVE);
change_type("/mnt/a/shared-slave", MS_SHARED);
change_type("/mnt/no-locked", MS_PRIVATE);
in_parent();
fflush(stdout);
write(fd2[1], &c, 1);
read(fd1[0], &c, 1);
return 0;
}
> void do_unshare(void)
> {
> FILE *f;
> uid_t uid = geteuid();
> gid_t gid = getegid();
> unshare(CLONE_NEWNS|CLONE_NEWUSER);
> f = fopen("/proc/self/uid_map", "w");
> fprintf(f, "0 %d 1", uid);
> fclose(f);
> f = fopen("/proc/self/setgroups", "w");
> fprintf(f, "deny");
> fclose(f);
> f = fopen("/proc/self/gid_map", "w");
> fprintf(f, "0 %d 1", gid);
> fclose(f);
> mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL);
> }
This obviously needs error checking - in this form it won't do
anything good without userns enabled (coredump on the first
fprintf() in there, since there won't be /proc/self/uid_map);
should probably just report CLONE_NEWUSER failure, warn about
skipped tests, fall back to unshare(CLONE_NEWNS) and skip
everything in in_child()...
© 2016 - 2026 Red Hat, Inc.