hw/9pfs/9p-local.c | 44 ++++++++++++++++++++++++++++---------------- hw/9pfs/9p-util.h | 10 +++++++--- 2 files changed, 35 insertions(+), 19 deletions(-)
This function has to ensure it doesn't follow a symlink that could be used
to escape the virtfs directory. This could be easily achieved if fchmodat()
on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but
it doesn't.
The current implementation covers most use-cases, but it notably fails if:
- the target path has access rights equal to 0000 (openat() returns EPERM),
=> once you've done chmod(0000) on a file, you can never chmod() again
- the target path is UNIX domain socket (openat() returns ENXIO)
=> bind() of UNIX domain sockets fails if the file is on 9pfs
The solution is to use O_PATH: openat() now succeeds in both cases, and we
can ensure the path isn't a symlink with fstat(). The associated entry in
"/proc/self/fd" can hence be safely passed to the regular chmod() syscall.
Signed-off-by: Greg Kurz <groug@kaod.org>
---
hw/9pfs/9p-local.c | 44 ++++++++++++++++++++++++++++----------------
hw/9pfs/9p-util.h | 10 +++++++---
2 files changed, 35 insertions(+), 19 deletions(-)
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 6e478f4765ef..b178d627c764 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -333,30 +333,42 @@ update_map_file:
static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode)
{
+ struct stat stbuf;
int fd, ret;
+ char *proc_path;
/* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW).
- * Unfortunately, the linux kernel doesn't implement it yet. As an
- * alternative, let's open the file and use fchmod() instead. This
- * may fail depending on the permissions of the file, but it is the
- * best we can do to avoid TOCTTOU. We first try to open read-only
- * in case name points to a directory. If that fails, we try write-only
- * in case name doesn't point to a directory.
+ * Unfortunately, the linux kernel doesn't implement it yet.
*/
- fd = openat_file(dirfd, name, O_RDONLY, 0);
- if (fd == -1) {
- /* In case the file is writable-only and isn't a directory. */
- if (errno == EACCES) {
- fd = openat_file(dirfd, name, O_WRONLY, 0);
- }
- if (fd == -1 && errno == EISDIR) {
- errno = EACCES;
- }
+ if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW)) {
+ return -1;
}
+
+ if (S_ISLNK(stbuf.st_mode)) {
+ errno = ELOOP;
+ return -1;
+ }
+
+ fd = openat_file(dirfd, name, O_RDONLY | O_PATH, 0);
if (fd == -1) {
return -1;
}
- ret = fchmod(fd, mode);
+
+ ret = fstat(fd, &stbuf);
+ if (ret) {
+ goto out;
+ }
+
+ if (S_ISLNK(stbuf.st_mode)) {
+ errno = ELOOP;
+ ret = -1;
+ goto out;
+ }
+
+ proc_path = g_strdup_printf("/proc/self/fd/%d", fd);
+ ret = chmod(proc_path, mode);
+ g_free(proc_path);
+out:
close_preserve_errno(fd);
return ret;
}
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
index 91299a24b8af..7c1c8fb1e35c 100644
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -43,9 +43,13 @@ static inline int openat_file(int dirfd, const char *name, int flags,
}
serrno = errno;
- /* O_NONBLOCK was only needed to open the file. Let's drop it. */
- ret = fcntl(fd, F_SETFL, flags);
- assert(!ret);
+ /* O_NONBLOCK was only needed to open the file. Let's drop it. We don't
+ * do that with O_PATH since it ignores O_NONBLOCK.
+ */
+ if (!(flags & O_PATH)) {
+ ret = fcntl(fd, F_SETFL, flags);
+ assert(!ret);
+ }
errno = serrno;
return fd;
}
Hi Greg, is this also related to CVE-2016-9602? On 08/08/2017 02:28 PM, Greg Kurz wrote: > This function has to ensure it doesn't follow a symlink that could be used > to escape the virtfs directory. This could be easily achieved if fchmodat() > on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but > it doesn't. > > The current implementation covers most use-cases, but it notably fails if: > - the target path has access rights equal to 0000 (openat() returns EPERM), > => once you've done chmod(0000) on a file, you can never chmod() again > - the target path is UNIX domain socket (openat() returns ENXIO) > => bind() of UNIX domain sockets fails if the file is on 9pfs > > The solution is to use O_PATH: openat() now succeeds in both cases, and we > can ensure the path isn't a symlink with fstat(). The associated entry in > "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. > > Signed-off-by: Greg Kurz <groug@kaod.org> > --- > hw/9pfs/9p-local.c | 44 ++++++++++++++++++++++++++++---------------- > hw/9pfs/9p-util.h | 10 +++++++--- > 2 files changed, 35 insertions(+), 19 deletions(-) > > diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c > index 6e478f4765ef..b178d627c764 100644 > --- a/hw/9pfs/9p-local.c > +++ b/hw/9pfs/9p-local.c > @@ -333,30 +333,42 @@ update_map_file: > > static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) > { > + struct stat stbuf; > int fd, ret; > + char *proc_path; > > /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). > - * Unfortunately, the linux kernel doesn't implement it yet. As an > - * alternative, let's open the file and use fchmod() instead. This > - * may fail depending on the permissions of the file, but it is the > - * best we can do to avoid TOCTTOU. We first try to open read-only > - * in case name points to a directory. If that fails, we try write-only > - * in case name doesn't point to a directory. > + * Unfortunately, the linux kernel doesn't implement it yet. > */ > - fd = openat_file(dirfd, name, O_RDONLY, 0); > - if (fd == -1) { > - /* In case the file is writable-only and isn't a directory. */ > - if (errno == EACCES) { > - fd = openat_file(dirfd, name, O_WRONLY, 0); > - } > - if (fd == -1 && errno == EISDIR) { > - errno = EACCES; > - } > + if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW)) { > + return -1; > } > + > + if (S_ISLNK(stbuf.st_mode)) { > + errno = ELOOP; > + return -1; > + } > + > + fd = openat_file(dirfd, name, O_RDONLY | O_PATH, 0); since you use O_PATH, you can drop O_RDONLY. > if (fd == -1) { > return -1; > } > - ret = fchmod(fd, mode); > + > + ret = fstat(fd, &stbuf); > + if (ret) { > + goto out; > + } > + > + if (S_ISLNK(stbuf.st_mode)) { > + errno = ELOOP; > + ret = -1; > + goto out; > + } > + > + proc_path = g_strdup_printf("/proc/self/fd/%d", fd); > + ret = chmod(proc_path, mode); > + g_free(proc_path); > +out: > close_preserve_errno(fd); > return ret; > } > diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h > index 91299a24b8af..7c1c8fb1e35c 100644 > --- a/hw/9pfs/9p-util.h > +++ b/hw/9pfs/9p-util.h > @@ -43,9 +43,13 @@ static inline int openat_file(int dirfd, const char *name, int flags, > } > > serrno = errno; > - /* O_NONBLOCK was only needed to open the file. Let's drop it. */ > - ret = fcntl(fd, F_SETFL, flags); > - assert(!ret); > + /* O_NONBLOCK was only needed to open the file. Let's drop it. We don't > + * do that with O_PATH since it ignores O_NONBLOCK. > + */ thinking about if someone uses openat_file(d, O_PATH|O_CLOEXEC, ...), why not set the fd flags always? like: ret = fcntl(fd, F_SETFL, flags); assert((flags & O_PATH) || !ret); > errno = serrno; > return fd; > } > >
On 08/08/2017 01:48 PM, Philippe Mathieu-Daudé wrote: >> + fd = openat_file(dirfd, name, O_RDONLY | O_PATH, 0); > > since you use O_PATH, you can drop O_RDONLY. Technically, POSIX says (and 'man 2 open' agrees, modulo the fact that Linux still lacks O_SEARCH) that you MUST provide one of the 5 access modes (they are O_RDONLY, O_RDWR, O_WRONLY, O_EXEC, and O_SEARCH; although POSIX allows O_EXEC and O_SEARCH to be the same bit pattern), and then bitwise-or any additional flags. So the usage here is correct. Practically, though, this code is ONLY compilable on Linux (no one else has O_PATH yet, although it is a useful Linux invention), and on Linux, O_RDONLY is 0. So behaviorally, you are correct that open(O_PATH) rather than open(O_RDONLY | O_PATH) does the same thing, even if it sets a bad example. [Well, insofar as you don't have any other bugs by omitting O_NOFOLLOW, per my other thread...] >> +++ b/hw/9pfs/9p-util.h >> @@ -43,9 +43,13 @@ static inline int openat_file(int dirfd, const char >> *name, int flags, >> } >> serrno = errno; >> - /* O_NONBLOCK was only needed to open the file. Let's drop it. */ >> - ret = fcntl(fd, F_SETFL, flags); >> - assert(!ret); >> + /* O_NONBLOCK was only needed to open the file. Let's drop it. We >> don't >> + * do that with O_PATH since it ignores O_NONBLOCK. >> + */ > > thinking about if someone uses openat_file(d, O_PATH|O_CLOEXEC, ...), > why not set the fd flags always? like: > > ret = fcntl(fd, F_SETFL, flags); > assert((flags & O_PATH) || !ret); Syscalls are expensive. It's better to avoid fcntl() up front than it is to skip failure after the fact. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
On 08/08/2017 04:34 PM, Eric Blake wrote: > On 08/08/2017 01:48 PM, Philippe Mathieu-Daudé wrote: > >>> + fd = openat_file(dirfd, name, O_RDONLY | O_PATH, 0); >> >> since you use O_PATH, you can drop O_RDONLY. > > Technically, POSIX says (and 'man 2 open' agrees, modulo the fact that > Linux still lacks O_SEARCH) that you MUST provide one of the 5 access > modes (they are O_RDONLY, O_RDWR, O_WRONLY, O_EXEC, and O_SEARCH; > although POSIX allows O_EXEC and O_SEARCH to be the same bit pattern), > and then bitwise-or any additional flags. So the usage here is correct. > > Practically, though, this code is ONLY compilable on Linux (no one else > has O_PATH yet, although it is a useful Linux invention), and on Linux, > O_RDONLY is 0. So behaviorally, you are correct that open(O_PATH) > rather than open(O_RDONLY | O_PATH) does the same thing, even if it sets > a bad example. [Well, insofar as you don't have any other bugs by > omitting O_NOFOLLOW, per my other thread...] Oh ok. I didn't think of that, just checked Linux manpage: O_PATH (since Linux 2.6.39) When O_PATH is specified in flags, flag bits other than O_CLOEXEC, O_DIRECTORY, and O_NOFOLLOW are ignored. > >>> +++ b/hw/9pfs/9p-util.h >>> @@ -43,9 +43,13 @@ static inline int openat_file(int dirfd, const char >>> *name, int flags, >>> } >>> serrno = errno; >>> - /* O_NONBLOCK was only needed to open the file. Let's drop it. */ >>> - ret = fcntl(fd, F_SETFL, flags); >>> - assert(!ret); >>> + /* O_NONBLOCK was only needed to open the file. Let's drop it. We >>> don't >>> + * do that with O_PATH since it ignores O_NONBLOCK. >>> + */ >> >> thinking about if someone uses openat_file(d, O_PATH|O_CLOEXEC, ...), >> why not set the fd flags always? like: >> >> ret = fcntl(fd, F_SETFL, flags); >> assert((flags & O_PATH) || !ret); > > Syscalls are expensive. It's better to avoid fcntl() up front than it > is to skip failure after the fact. Ok, good to know! Thank to clarify those points, Phil.
On 08/08/2017 03:10 PM, Philippe Mathieu-Daudé wrote: >> Technically, POSIX says (and 'man 2 open' agrees, modulo the fact that >> Linux still lacks O_SEARCH) that you MUST provide one of the 5 access >> modes (they are O_RDONLY, O_RDWR, O_WRONLY, O_EXEC, and O_SEARCH; >> although POSIX allows O_EXEC and O_SEARCH to be the same bit pattern), >> and then bitwise-or any additional flags. So the usage here is correct. >> > Oh ok. I didn't think of that, just checked Linux manpage: > > O_PATH (since Linux 2.6.39) > > When O_PATH is specified in flags, flag bits other than > O_CLOEXEC, O_DIRECTORY, and O_NOFOLLOW are ignored. There are access modes (5 in POSIX), and then flag bits (O_NONBLOCK being one of the flag bits, and therefore ignored when O_PATH is true). Presumably, the author was being careful by mentioning "flag bits" (and thereby implicitly meaning that O_RDONLY is NOT ignored when using O_PATH). But I'm not _quite_ sure whether O_PATH should be considered a sixth access mode, or a flag bit, and the Linux man page doesn't help on that front ;) Hmm - if you treat O_PATH as an access mode rather than a flag bit, then O_RDONLY | O_PATH no longer makes sense at all (you can't mix two modes at once). Maybe we should file a bug report against the man page to get clarification. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
On 08/08/2017 03:24 PM, Eric Blake wrote: > On 08/08/2017 03:10 PM, Philippe Mathieu-Daudé wrote: >>> Technically, POSIX says (and 'man 2 open' agrees, modulo the fact that >>> Linux still lacks O_SEARCH) that you MUST provide one of the 5 access >>> modes (they are O_RDONLY, O_RDWR, O_WRONLY, O_EXEC, and O_SEARCH; >>> although POSIX allows O_EXEC and O_SEARCH to be the same bit pattern), >>> and then bitwise-or any additional flags. So the usage here is correct. >>> > >> Oh ok. I didn't think of that, just checked Linux manpage: >> >> O_PATH (since Linux 2.6.39) >> >> When O_PATH is specified in flags, flag bits other than >> O_CLOEXEC, O_DIRECTORY, and O_NOFOLLOW are ignored. > > There are access modes (5 in POSIX), and then flag bits (O_NONBLOCK > being one of the flag bits, and therefore ignored when O_PATH is true). > Presumably, the author was being careful by mentioning "flag bits" (and > thereby implicitly meaning that O_RDONLY is NOT ignored when using > O_PATH). But I'm not _quite_ sure whether O_PATH should be considered a > sixth access mode, or a flag bit, and the Linux man page doesn't help on > that front ;) Hmm - if you treat O_PATH as an access mode rather than a > flag bit, then O_RDONLY | O_PATH no longer makes sense at all (you can't > mix two modes at once). Maybe we should file a bug report against the > man page to get clarification. Quoting my version of 'man 2 open' The argument flags must include one of the following access modes: O_RDONLY, O_WRONLY, or O_RDWR. These request opening the file read- only, write-only, or read/write, respectively. In addition, zero or more file creation flags and file status flags can be bitwise-or'd in flags. The file creation flags are O_CLOEXEC, O_CREAT, O_DIRECTORY, O_EXCL, O_NOCTTY, O_NOFOLLOW, O_TMPFILE, and O_TRUNC. The file status flags are all of the remaining flags listed below. The distinction between these two groups of flags is that the file status flags can be retrieved and (in some cases) modified; see fcntl(2) for details. and fcntl() lets you see whether an fd was opened with O_PATH, which makes it a file status flag. Well, except that fcntl() also lets you see which mode an fd was opened with (such as O_WRONLY). Hmm - still fuzzy. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
On Tue, 8 Aug 2017 15:28:35 -0500 Eric Blake <eblake@redhat.com> wrote: > On 08/08/2017 03:24 PM, Eric Blake wrote: > > On 08/08/2017 03:10 PM, Philippe Mathieu-Daudé wrote: > >>> Technically, POSIX says (and 'man 2 open' agrees, modulo the fact that > >>> Linux still lacks O_SEARCH) that you MUST provide one of the 5 access > >>> modes (they are O_RDONLY, O_RDWR, O_WRONLY, O_EXEC, and O_SEARCH; > >>> although POSIX allows O_EXEC and O_SEARCH to be the same bit pattern), > >>> and then bitwise-or any additional flags. So the usage here is correct. > >>> > > > >> Oh ok. I didn't think of that, just checked Linux manpage: > >> > >> O_PATH (since Linux 2.6.39) > >> > >> When O_PATH is specified in flags, flag bits other than > >> O_CLOEXEC, O_DIRECTORY, and O_NOFOLLOW are ignored. > > > > There are access modes (5 in POSIX), and then flag bits (O_NONBLOCK > > being one of the flag bits, and therefore ignored when O_PATH is true). > > Presumably, the author was being careful by mentioning "flag bits" (and > > thereby implicitly meaning that O_RDONLY is NOT ignored when using > > O_PATH). But I'm not _quite_ sure whether O_PATH should be considered a > > sixth access mode, or a flag bit, and the Linux man page doesn't help on > > that front ;) Hmm - if you treat O_PATH as an access mode rather than a > > flag bit, then O_RDONLY | O_PATH no longer makes sense at all (you can't > > mix two modes at once). Maybe we should file a bug report against the > > man page to get clarification. > > Quoting my version of 'man 2 open' > > The argument flags must include one of the following access > modes: > O_RDONLY, O_WRONLY, or O_RDWR. These request opening the file > read- > only, write-only, or read/write, respectively. > > In addition, zero or more file creation flags and file status > flags can > be bitwise-or'd in flags. The file creation flags are > O_CLOEXEC, > O_CREAT, O_DIRECTORY, O_EXCL, O_NOCTTY, O_NOFOLLOW, > O_TMPFILE, and > O_TRUNC. The file status flags are all of the remaining flags > listed > below. The distinction between these two groups of flags is > that the > file status flags can be retrieved and (in some cases) > modified; see > fcntl(2) for details. > > and fcntl() lets you see whether an fd was opened with O_PATH, which > makes it a file status flag. Well, except that fcntl() also lets you > see which mode an fd was opened with (such as O_WRONLY). Hmm - still fuzzy. > I agree the documentation is unclear, but in linux/fs/open.c we have: static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) { int lookup_flags = 0; int acc_mode = ACC_MODE(flags); [...] } else if (flags & O_PATH) { /* * If we have O_PATH in the open flag. Then we * cannot have anything other than the below set of flags */ flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; acc_mode = 0; } This seems to indicate that the access mode is simply ignored. So I guess it is ok to drop O_RDONLY, even if it isn't clearly documented in the manpage... I don't have a strong opinion anyway.
On 08/08/2017 12:28 PM, Greg Kurz wrote: > This function has to ensure it doesn't follow a symlink that could be used > to escape the virtfs directory. This could be easily achieved if fchmodat() > on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but > it doesn't. > > The current implementation covers most use-cases, but it notably fails if: > - the target path has access rights equal to 0000 (openat() returns EPERM), > => once you've done chmod(0000) on a file, you can never chmod() again > - the target path is UNIX domain socket (openat() returns ENXIO) > => bind() of UNIX domain sockets fails if the file is on 9pfs Did your attempt at a kernel patch for AT_SYMLINK_NOFOLLOW ever get anywhere? > > The solution is to use O_PATH: openat() now succeeds in both cases, and we > can ensure the path isn't a symlink with fstat(). The associated entry in > "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. > > Signed-off-by: Greg Kurz <groug@kaod.org> > --- > hw/9pfs/9p-local.c | 44 ++++++++++++++++++++++++++++---------------- > hw/9pfs/9p-util.h | 10 +++++++--- > 2 files changed, 35 insertions(+), 19 deletions(-) > > diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c > index 6e478f4765ef..b178d627c764 100644 > --- a/hw/9pfs/9p-local.c > +++ b/hw/9pfs/9p-local.c > @@ -333,30 +333,42 @@ update_map_file: > > static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) > { > + struct stat stbuf; > int fd, ret; > + char *proc_path; > > /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). > - * Unfortunately, the linux kernel doesn't implement it yet. As an > - * alternative, let's open the file and use fchmod() instead. This > - * may fail depending on the permissions of the file, but it is the > - * best we can do to avoid TOCTTOU. We first try to open read-only > - * in case name points to a directory. If that fails, we try write-only > - * in case name doesn't point to a directory. > + * Unfortunately, the linux kernel doesn't implement it yet. > */ > - fd = openat_file(dirfd, name, O_RDONLY, 0); > - if (fd == -1) { > - /* In case the file is writable-only and isn't a directory. */ > - if (errno == EACCES) { > - fd = openat_file(dirfd, name, O_WRONLY, 0); > - } > - if (fd == -1 && errno == EISDIR) { > - errno = EACCES; > - } > + if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW)) { > + return -1; > } Checking the file... > + > + if (S_ISLNK(stbuf.st_mode)) { > + errno = ELOOP; > + return -1; > + } > + > + fd = openat_file(dirfd, name, O_RDONLY | O_PATH, 0); ...and then opening the file is a TOCTTOU race (although it works most of the time and avoids the open where it is easy)... > if (fd == -1) { > return -1; > } > - ret = fchmod(fd, mode); > + > + ret = fstat(fd, &stbuf); > + if (ret) { > + goto out; > + } ...so you are double-checking that you got a non-symlink after all (your insurance against the race having done the wrong thing [but see below])... > + > + if (S_ISLNK(stbuf.st_mode)) { > + errno = ELOOP; > + ret = -1; > + goto out; > + } > + > + proc_path = g_strdup_printf("/proc/self/fd/%d", fd); > + ret = chmod(proc_path, mode); ...at which point you now have a valid file name that represents the file you wanted to chmod() in the first place (even if another rename occurs in the meantime, you are changing the mode tied to the non-symlink fd that you double-checked, which ends up behaving as if you had won the race and made the chmod() call before the rename). > + g_free(proc_path); > +out: > close_preserve_errno(fd); > return ret; Might be worth littering some comments in the code explaining why you have to call both fstatat() and stat(), or perhaps you could drop the first fstatat() and just always do the open(). Reading 'man open', it looks like O_PATH will chase symlinks UNLESS you also use O_NOFOLLOW. So I had to code up a simple test program to verify if things work... ============= #define _GNU_SOURCE 1 #include <sys/stat.h> #include <stdio.h> #include <errno.h> #include <unistd.h> #include <fcntl.h> int main(void) { struct stat st; int fd; remove("f"); remove("l"); if (creat("f", 0) < 0) return 1; if (symlink("f", "l") < 0) return 2; fd = open("l", O_RDONLY | O_PATH); if (fd < 0) return 3; if (fstat(fd, &st) < 0) return 4; if (S_ISLNK(st.st_mode)) { printf("got a link\n"); } else { printf("dereferenced\n"); } close(fd); fd = open("l", O_RDONLY | O_PATH | O_NOFOLLOW); if (fd < 0) return 5; if (fstat(fd, &st) < 0) return 6; if (S_ISLNK(st.st_mode)) { printf("got a link\n"); } else { printf("dereferenced\n"); } close(fd); remove("f"); remove("l"); return 0; } ============ $ ./foo dereferenced got a link Ouch - you need to fix your patch to use open(O_NOFOLLOW | O_PATH). Furthermore, I'm worried that your patch is regressing commit 918112c0, when compiling for older platforms where either O_PATH does not exist, or where libc exposes the constant but the kernel is too old to understand it. (I guess the latter problem is already existing in our code base, so we really only need to worry about the former of compiling when the constant does not exist). So if supporting old platforms is desired, you MUST keep BOTH approaches intact, gated by whether O_PATH is defined to a non-zero value, rather than just assuming O_PATH works. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
On Tue, 8 Aug 2017 14:14:18 -0500 Eric Blake <eblake@redhat.com> wrote: > On 08/08/2017 12:28 PM, Greg Kurz wrote: > > This function has to ensure it doesn't follow a symlink that could be used > > to escape the virtfs directory. This could be easily achieved if fchmodat() > > on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but > > it doesn't. > > > > The current implementation covers most use-cases, but it notably fails if: > > - the target path has access rights equal to 0000 (openat() returns EPERM), > > => once you've done chmod(0000) on a file, you can never chmod() again > > - the target path is UNIX domain socket (openat() returns ENXIO) > > => bind() of UNIX domain sockets fails if the file is on 9pfs > > Did your attempt at a kernel patch for AT_SYMLINK_NOFOLLOW ever get > anywhere? > No. > > > > The solution is to use O_PATH: openat() now succeeds in both cases, and we > > can ensure the path isn't a symlink with fstat(). The associated entry in > > "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. > > > > Signed-off-by: Greg Kurz <groug@kaod.org> > > --- > > hw/9pfs/9p-local.c | 44 ++++++++++++++++++++++++++++---------------- > > hw/9pfs/9p-util.h | 10 +++++++--- > > 2 files changed, 35 insertions(+), 19 deletions(-) > > > > diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c > > index 6e478f4765ef..b178d627c764 100644 > > --- a/hw/9pfs/9p-local.c > > +++ b/hw/9pfs/9p-local.c > > @@ -333,30 +333,42 @@ update_map_file: > > > > static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) > > { > > + struct stat stbuf; > > int fd, ret; > > + char *proc_path; > > > > /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). > > - * Unfortunately, the linux kernel doesn't implement it yet. As an > > - * alternative, let's open the file and use fchmod() instead. This > > - * may fail depending on the permissions of the file, but it is the > > - * best we can do to avoid TOCTTOU. We first try to open read-only > > - * in case name points to a directory. If that fails, we try write-only > > - * in case name doesn't point to a directory. > > + * Unfortunately, the linux kernel doesn't implement it yet. > > */ > > - fd = openat_file(dirfd, name, O_RDONLY, 0); > > - if (fd == -1) { > > - /* In case the file is writable-only and isn't a directory. */ > > - if (errno == EACCES) { > > - fd = openat_file(dirfd, name, O_WRONLY, 0); > > - } > > - if (fd == -1 && errno == EISDIR) { > > - errno = EACCES; > > - } > > + if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW)) { > > + return -1; > > } > > Checking the file... > > > + > > + if (S_ISLNK(stbuf.st_mode)) { > > + errno = ELOOP; > > + return -1; > > + } > > + > > + fd = openat_file(dirfd, name, O_RDONLY | O_PATH, 0); > > ...and then opening the file is a TOCTTOU race (although it works most > of the time and avoids the open where it is easy)... > Exactly. It is globally assumed in the 9p code that symbolic links are resolved by the client. The earlier we detect the file is a symbolic link, the earlier we can error out. The fstat()+S_ISLNK() is a deliberate fast error path actually :) > > if (fd == -1) { > > return -1; > > } > > - ret = fchmod(fd, mode); > > + > > + ret = fstat(fd, &stbuf); > > + if (ret) { > > + goto out; > > + } > > ...so you are double-checking that you got a non-symlink after all (your > insurance against the race having done the wrong thing [but see below])... > Yes, this is the *real* check. > > + > > + if (S_ISLNK(stbuf.st_mode)) { > > + errno = ELOOP; > > + ret = -1; > > + goto out; > > + } > > + > > + proc_path = g_strdup_printf("/proc/self/fd/%d", fd); > > + ret = chmod(proc_path, mode); > > ...at which point you now have a valid file name that represents the > file you wanted to chmod() in the first place (even if another rename > occurs in the meantime, you are changing the mode tied to the > non-symlink fd that you double-checked, which ends up behaving as if you > had won the race and made the chmod() call before the rename). > Yes. > > + g_free(proc_path); > > +out: > > close_preserve_errno(fd); > > return ret; > > Might be worth littering some comments in the code explaining why you > have to call both fstatat() and stat(), or perhaps you could drop the > first fstatat() and just always do the open(). > I like the idea of erroring out right away when a symbolic link is detected. I'll add comments. > Reading 'man open', it looks like O_PATH will chase symlinks UNLESS you > also use O_NOFOLLOW. So I had to code up a simple test program to > verify if things work... > > ============= > #define _GNU_SOURCE 1 > #include <sys/stat.h> > #include <stdio.h> > #include <errno.h> > #include <unistd.h> > #include <fcntl.h> > > int main(void) { > struct stat st; > int fd; > > remove("f"); > remove("l"); > if (creat("f", 0) < 0) > return 1; > if (symlink("f", "l") < 0) > return 2; > > fd = open("l", O_RDONLY | O_PATH); > if (fd < 0) > return 3; > if (fstat(fd, &st) < 0) > return 4; > if (S_ISLNK(st.st_mode)) { > printf("got a link\n"); > } else { > printf("dereferenced\n"); > } > close(fd); > > fd = open("l", O_RDONLY | O_PATH | O_NOFOLLOW); > if (fd < 0) > return 5; > if (fstat(fd, &st) < 0) > return 6; > if (S_ISLNK(st.st_mode)) { > printf("got a link\n"); > } else { > printf("dereferenced\n"); > } > close(fd); > > remove("f"); > remove("l"); > return 0; > } > ============ > $ ./foo > dereferenced > got a link > > Ouch - you need to fix your patch to use open(O_NOFOLLOW | O_PATH). > openat_file() adds O_NOFOLLOW... I'll rename it to openat_file_nofollow() in a separate patch for clarity. > Furthermore, I'm worried that your patch is regressing commit 918112c0, > when compiling for older platforms where either O_PATH does not exist, > or where libc exposes the constant but the kernel is too old to > understand it. (I guess the latter problem is already existing in our > code base, so we really only need to worry about the former of compiling > when the constant does not exist). So if supporting old platforms is > desired, you MUST keep BOTH approaches intact, gated by whether O_PATH > is defined to a non-zero value, rather than just assuming O_PATH works. > I don't necessarily want this to work for old platforms, but I'll fix the potential build break like in commit 918112c0.
© 2016 - 2024 Red Hat, Inc.