use uniform permission checks for all mount propagation changes

[PATCH][RFC][CFT] use uniform permission checks for all mount propagation changes

Posted by Al Viro 1 month, 3 weeks ago

[this should fix userland regression from do_change_type() fix last cycle;
we have too little self-test coverage in the area, unfortunately.  First
approximation for selftest in the followup to this posting.  Review and
testing of both the patch and test would be very welcome]

do_change_type() and do_set_group() are operating on different
aspects of the same thing - propagation graph.  The latter
asks for mounts involved to be mounted in namespace(s) the caller
has CAP_SYS_ADMIN for.  The former is a mess - originally it
didn't even check that mount *is* mounted.  That got fixed,
but the resulting check turns out to be too strict for userland -
in effect, we check that mount is in our namespace, having already
checked that we have CAP_SYS_ADMIN there.

What we really need (in both cases) is
	* we only touch mounts that are mounted.  Hard requirement,
data corruption if that's get violated.
	* we don't allow to mess with a namespace unless you already
have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns).

That's an equivalent of what do_set_group() does; let's extract that
into a helper (may_change_propagation()) and use it in both
do_set_group() and do_change_type().

Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts"
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
diff --git a/fs/namespace.c b/fs/namespace.c
index ddfd4457d338..a191c6519e36 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2862,6 +2862,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
 	return attach_recursive_mnt(mnt, p, mp);
 }
 
+static int may_change_propagation(const struct mount *m)
+{
+        struct mnt_namespace *ns = m->mnt_ns;
+
+	 // it must be mounted in some namespace
+	 if (IS_ERR_OR_NULL(ns))         // is_mounted()
+		 return -EINVAL;
+	 // and the caller must be admin in userns of that namespace
+	 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+		 return -EPERM;
+	 return 0;
+}
+
 /*
  * Sanity check the flags to change_mnt_propagation.
  */
@@ -2898,10 +2911,10 @@ static int do_change_type(struct path *path, int ms_flags)
 		return -EINVAL;
 
 	namespace_lock();
-	if (!check_mnt(mnt)) {
-		err = -EINVAL;
+	err = may_change_propagation(mnt);
+	if (err)
 		goto out_unlock;
-	}
+
 	if (type == MS_SHARED) {
 		err = invent_group_ids(mnt, recurse);
 		if (err)
@@ -3347,18 +3360,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
 
 	namespace_lock();
 
-	err = -EINVAL;
-	/* To and From must be mounted */
-	if (!is_mounted(&from->mnt))
-		goto out;
-	if (!is_mounted(&to->mnt))
-		goto out;
-
-	err = -EPERM;
-	/* We should be allowed to modify mount namespaces of both mounts */
-	if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
+	err = may_change_propagation(from);
+	if (err)
 		goto out;
-	if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
+	err = may_change_propagation(to);
+	if (err)
 		goto out;
 
 	err = -EINVAL;

[RFC][CFT] selftest for permission checks in mount propagation changes

Posted by Al Viro 1 month, 3 weeks ago

// link with -lcap, can run both as root and as regular user
#define _GNU_SOURCE
#include <sched.h>
#include <sys/capability.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <stdbool.h>

_Bool drop_caps(void)
{
        cap_value_t cap_value[] = { CAP_SYS_ADMIN };
        cap_t cap = cap_get_proc();
        if (!cap) {
		perror("cap_get_proc");
		return false;
	}
	return true;
}

void do_unshare(void)
{
	FILE *f;
	uid_t uid = geteuid();
	gid_t gid = getegid();
	unshare(CLONE_NEWNS|CLONE_NEWUSER);
	f = fopen("/proc/self/uid_map", "w");
	fprintf(f, "0 %d 1", uid);
	fclose(f);
	f = fopen("/proc/self/setgroups", "w");
	fprintf(f, "deny");
	fclose(f);
	f = fopen("/proc/self/gid_map", "w");
	fprintf(f, "0 %d 1", gid);
	fclose(f);
	mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL);
}

void bind(char *p)
{
	mount(p, p, NULL, MS_BIND, NULL);
}

void change_type(char *p, int type)
{
	errno = 0;
	mount(NULL, p, NULL, type, NULL);
}

void set_group(int fd1, char *p1, int fd2, char *p2)
{
	int flags = MOVE_MOUNT_SET_GROUP;
	int n;

	if (!p1 || !*p1) {
		p1 = "";	// be kind to old kernels
		flags |= MOVE_MOUNT_F_EMPTY_PATH;
	}
	if (!p2 || !*p2) {
		p2 = "";	// be kind to old kernels
		flags |= MOVE_MOUNT_T_EMPTY_PATH;
	}
	errno = 0;
	move_mount(fd1, p1, fd2, p2, flags);
}

_Bool result(int expected)
{
	if (expected != errno) {
		printf(" failed: %d != %d\n", expected, errno);
		return false;
	}
	printf(" OK\n");
	return true;
}

int fd1[2], fd2[2];

void in_child(void)
{
	printf("from should be someplace we have permissions for");
	set_group(AT_FDCWD, "mnt/a", AT_FDCWD, "/mnt/a/private");
	result(EPERM);
	printf("to should be someplace we have permissions for");
	set_group(AT_FDCWD, "/mnt/a", AT_FDCWD, "mnt/a/private");
	result(EPERM);
	printf("change_type: should have permissions for target");
	change_type("mnt/locked", MS_PRIVATE);
	result(EPERM);
}

void in_parent(void)
{
	printf("from should be mounted (pipes are not)");
	set_group(fd1[0], NULL, AT_FDCWD, "/mnt/a/private");
	result(EINVAL);

	printf("to should be mounted (pipes are not)");
	set_group(AT_FDCWD, "/mnt", fd1[0], NULL);
	result(EINVAL);

	printf("from should be someplace we have permissions for");
	set_group(AT_FDCWD, "mnt/a", AT_FDCWD, "/mnt/a/private");
	if (result(0))
		change_type("/mnt/a/private", MS_PRIVATE);

	printf("to should be someplace we have permissions for");
	set_group(AT_FDCWD, "/mnt", AT_FDCWD, "mnt/a/private");
	if (result(0))
		change_type("mnt/a/private", MS_PRIVATE);

	printf("from should be mountpoint");
	set_group(AT_FDCWD, "/mnt/a", AT_FDCWD, "/mnt/a/private");
	result(EINVAL);

	printf("to should be mountpoint");
	set_group(AT_FDCWD, "/mnt/a", AT_FDCWD, "/mnt/a/private/b");
	result(EINVAL);

	printf("to and from should be on the same filesystem");
	mount("none", "/mnt/no-locked", "tmpfs", 0, NULL);
	set_group(AT_FDCWD, "/mnt/a/shared", AT_FDCWD, "/mnt/no-locked");
	result(EINVAL);
	umount("/mnt/no-locked");

	printf("from should contain the counterpart of to");
	set_group(AT_FDCWD, "/mnt/a/shared", AT_FDCWD, "/mnt/no-locked");
	result(EINVAL);

	printf("from should not have anything locked in counterpart of to");
	set_group(AT_FDCWD, "mnt", AT_FDCWD, "/mnt/no-locked");
	if (result(0))
		change_type("/mnt/no-locked", MS_PRIVATE);

	printf("change_type: should have permissions for target");
	change_type("mnt/a/private", MS_PRIVATE);
	result(0);

	printf("change_type: target should be a mountpoint");
	change_type("/mnt/a", MS_PRIVATE);
	result(EINVAL);

	chdir("/mnt/a/private");
	umount2("/mnt/a/private", MNT_DETACH);
	printf("change_type: target should be mounted");
	change_type(".", MS_PRIVATE);
	result(EINVAL);
}

int main()
{
	char path[40];
	pid_t child;
	int root_fd;
	char c;

	if (pipe(fd1) < 0 || pipe(fd2) < 0) {
		perror("pipe");
		return -1;
	}
	if (!drop_caps())
		return -1;
	do_unshare();

	root_fd = open("/", O_PATH);

	errno = 0;
	mount("none", "/mnt", "tmpfs", 0, NULL);
	mkdir("/mnt/a", 0777);
	mkdir("/mnt/a/private", 0777);
	mkdir("/mnt/a/private/b", 0777);
	mkdir("/mnt/a/shared", 0777);
	mkdir("/mnt/a/slave", 0777);
	mkdir("/mnt/a/shared-slave", 0777);
	mkdir("/mnt/locked", 0777);
	mkdir("/mnt/no-locked", 0777);
	bind("/mnt/locked");

	child = fork();
	if (child < 0) {
		perror("fork");
		return -1;
	} else if (child == 0) {
		do_unshare();
		change_type("/mnt/", MS_SHARED);
		bind("/mnt/a");
		bind("/mnt/a/private");
		change_type("/mnt/a/private", MS_PRIVATE);
		write(fd1[1], &c, 1);
		read(fd2[0], &c, 1);

		fchdir(root_fd);
		in_child();

		write(fd1[1], &c, 1);
		return 0;
	}
	read(fd1[0], &c, 1);
	sprintf(path, "/proc/%d/root", child);
	chdir(path);

	change_type("/mnt", MS_SHARED);
	bind("/mnt/a/private");
	bind("/mnt/a/shared");
	bind("/mnt/a/slave");
	bind("/mnt/a/slave-shared");
	bind("/mnt/no-locked");
	change_type("/mnt/a/private", MS_PRIVATE);
	change_type("/mnt/a/slave", MS_SLAVE);
	change_type("/mnt/a/shared-slave", MS_SLAVE);
	change_type("/mnt/a/shared-slave", MS_SHARED);
	change_type("/mnt/no-locked", MS_PRIVATE);

	in_parent();

	fflush(stdout);
	write(fd2[1], &c, 1);
	read(fd1[0], &c, 1);
	return 0;
}

Re: [RFC][CFT] selftest for permission checks in mount propagation changes

Posted by Al Viro 1 month, 3 weeks ago

> void do_unshare(void)
> {
> 	FILE *f;
> 	uid_t uid = geteuid();
> 	gid_t gid = getegid();
> 	unshare(CLONE_NEWNS|CLONE_NEWUSER);
> 	f = fopen("/proc/self/uid_map", "w");
> 	fprintf(f, "0 %d 1", uid);
> 	fclose(f);
> 	f = fopen("/proc/self/setgroups", "w");
> 	fprintf(f, "deny");
> 	fclose(f);
> 	f = fopen("/proc/self/gid_map", "w");
> 	fprintf(f, "0 %d 1", gid);
> 	fclose(f);
> 	mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL);
> }

This obviously needs error checking - in this form it won't do
anything good without userns enabled (coredump on the first
fprintf() in there, since there won't be /proc/self/uid_map);
should probably just report CLONE_NEWUSER failure, warn about
skipped tests, fall back to unshare(CLONE_NEWNS) and skip
everything in in_child()...