hw/9pfs/9p-local.c | 41 +++++++++++++++++++++++++++++++++-------- hw/9pfs/9p-util.h | 24 +++++++++++++++--------- 2 files changed, 48 insertions(+), 17 deletions(-)
This function has to ensure it doesn't follow a symlink that could be used
to escape the virtfs directory. This could be easily achieved if fchmodat()
on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but
it doesn't.
The current implementation covers most use-cases, but it notably fails if:
- the target path has access rights equal to 0000 (openat() returns EPERM),
=> once you've done chmod(0000) on a file, you can never chmod() again
- the target path is UNIX domain socket (openat() returns ENXIO)
=> bind() of UNIX domain sockets fails if the file is on 9pfs
The solution is to use O_PATH: openat() now succeeds in both cases, and we
can ensure the path isn't a symlink with fstat(). The associated entry in
"/proc/self/fd" can hence be safely passed to the regular chmod() syscall.
The previous behavior is kept for older systems that don't have O_PATH.
Signed-off-by: Greg Kurz <groug@kaod.org>
---
v2: - renamed OPENAT_DIR_O_PATH to O_PATH_9P_UTIL and use it as a replacement
for O_PATH to avoid build breaks on O_PATH-less systems
- keep current behavior for O_PATH-less systems
- added comments
- TODO in 2.11: add _nofollow suffix to openat_dir() and openat_file()
---
hw/9pfs/9p-local.c | 41 +++++++++++++++++++++++++++++++++--------
hw/9pfs/9p-util.h | 24 +++++++++++++++---------
2 files changed, 48 insertions(+), 17 deletions(-)
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 6e478f4765ef..da7cbadbb848 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -333,17 +333,27 @@ update_map_file:
static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode)
{
+ struct stat stbuf;
int fd, ret;
+ char *proc_path;
/* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW).
- * Unfortunately, the linux kernel doesn't implement it yet. As an
- * alternative, let's open the file and use fchmod() instead. This
- * may fail depending on the permissions of the file, but it is the
- * best we can do to avoid TOCTTOU. We first try to open read-only
- * in case name points to a directory. If that fails, we try write-only
- * in case name doesn't point to a directory.
+ * Unfortunately, the linux kernel doesn't implement it yet.
+ */
+
+ /* First, we clear non-racing symlinks out of the way. */
+ if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW)) {
+ return -1;
+ }
+ if (S_ISLNK(stbuf.st_mode)) {
+ errno = ELOOP;
+ return -1;
+ }
+
+ /* Access modes are ignored when O_PATH is supported. We try O_RDONLY and
+ * O_WRONLY for old-systems that don't support O_PATH.
*/
- fd = openat_file(dirfd, name, O_RDONLY, 0);
+ fd = openat_file(dirfd, name, O_RDONLY | O_PATH_9P_UTIL, 0);
if (fd == -1) {
/* In case the file is writable-only and isn't a directory. */
if (errno == EACCES) {
@@ -356,7 +366,22 @@ static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode)
if (fd == -1) {
return -1;
}
- ret = fchmod(fd, mode);
+
+ /* Now we handle racing symlinks. */
+ ret = fstat(fd, &stbuf);
+ if (ret) {
+ goto out;
+ }
+ if (S_ISLNK(stbuf.st_mode)) {
+ errno = ELOOP;
+ ret = -1;
+ goto out;
+ }
+
+ proc_path = g_strdup_printf("/proc/self/fd/%d", fd);
+ ret = chmod(proc_path, mode);
+ g_free(proc_path);
+out:
close_preserve_errno(fd);
return ret;
}
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
index 91299a24b8af..dc0d2e29aa3b 100644
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -13,6 +13,12 @@
#ifndef QEMU_9P_UTIL_H
#define QEMU_9P_UTIL_H
+#ifdef O_PATH
+#define O_PATH_9P_UTIL O_PATH
+#else
+#define O_PATH_9P_UTIL 0
+#endif
+
static inline void close_preserve_errno(int fd)
{
int serrno = errno;
@@ -22,13 +28,8 @@ static inline void close_preserve_errno(int fd)
static inline int openat_dir(int dirfd, const char *name)
{
-#ifdef O_PATH
-#define OPENAT_DIR_O_PATH O_PATH
-#else
-#define OPENAT_DIR_O_PATH 0
-#endif
return openat(dirfd, name,
- O_DIRECTORY | O_RDONLY | O_NOFOLLOW | OPENAT_DIR_O_PATH);
+ O_DIRECTORY | O_RDONLY | O_NOFOLLOW | O_PATH_9P_UTIL);
}
static inline int openat_file(int dirfd, const char *name, int flags,
@@ -43,9 +44,14 @@ static inline int openat_file(int dirfd, const char *name, int flags,
}
serrno = errno;
- /* O_NONBLOCK was only needed to open the file. Let's drop it. */
- ret = fcntl(fd, F_SETFL, flags);
- assert(!ret);
+ /* O_NONBLOCK was only needed to open the file. Let's drop it. We don't
+ * do that with O_PATH since fcntl(F_SETFL) isn't supported, and openat()
+ * ignored it anyway.
+ */
+ if (!(flags & O_PATH_9P_UTIL)) {
+ ret = fcntl(fd, F_SETFL, flags);
+ assert(!ret);
+ }
errno = serrno;
return fd;
}
On 08/09/2017 09:23 AM, Greg Kurz wrote: > This function has to ensure it doesn't follow a symlink that could be used > to escape the virtfs directory. This could be easily achieved if fchmodat() > on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but > it doesn't. Might be worth including a URL of the LKML discussion on the last version of that patch attempt. > > The current implementation covers most use-cases, but it notably fails if: > - the target path has access rights equal to 0000 (openat() returns EPERM), > => once you've done chmod(0000) on a file, you can never chmod() again > - the target path is UNIX domain socket (openat() returns ENXIO) > => bind() of UNIX domain sockets fails if the file is on 9pfs > > The solution is to use O_PATH: openat() now succeeds in both cases, and we > can ensure the path isn't a symlink with fstat(). The associated entry in > "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. Hey - should we point this out as a viable solution to the glibc folks, since their current user-space emulation of AT_SYMLINK_NOFOLLOW is broken? > > The previous behavior is kept for older systems that don't have O_PATH. > > Signed-off-by: Greg Kurz <groug@kaod.org> > --- > v2: - renamed OPENAT_DIR_O_PATH to O_PATH_9P_UTIL and use it as a replacement > for O_PATH to avoid build breaks on O_PATH-less systems > - keep current behavior for O_PATH-less systems > - added comments > - TODO in 2.11: add _nofollow suffix to openat_dir() and openat_file() > --- > hw/9pfs/9p-local.c | 41 +++++++++++++++++++++++++++++++++-------- > hw/9pfs/9p-util.h | 24 +++++++++++++++--------- > 2 files changed, 48 insertions(+), 17 deletions(-) > > + /* First, we clear non-racing symlinks out of the way. */ > + if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW)) { > + return -1; > + } > + if (S_ISLNK(stbuf.st_mode)) { > + errno = ELOOP; I don't know if ELOOP is the best errno value to use here, but I don't have any better suggestions so I'm okay with it. > + return -1; > + } > + > + /* Access modes are ignored when O_PATH is supported. We try O_RDONLY and > + * O_WRONLY for old-systems that don't support O_PATH. > */ > - fd = openat_file(dirfd, name, O_RDONLY, 0); > + fd = openat_file(dirfd, name, O_RDONLY | O_PATH_9P_UTIL, 0); > if (fd == -1) { > /* In case the file is writable-only and isn't a directory. */ > if (errno == EACCES) { > @@ -356,7 +366,22 @@ static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) > if (fd == -1) { > return -1; > } > - ret = fchmod(fd, mode); > + > + /* Now we handle racing symlinks. */ > + ret = fstat(fd, &stbuf); > + if (ret) { > + goto out; > + } > + if (S_ISLNK(stbuf.st_mode)) { > + errno = ELOOP; > + ret = -1; > + goto out; > + } > + > + proc_path = g_strdup_printf("/proc/self/fd/%d", fd); > + ret = chmod(proc_path, mode); Nope, unsafe when O_PATH_9P_UTIL is 0. This needs to be more like: /* Now we handle racing symlinks. On kernels without O_PATH, we will * fail on some corner cases, but that's better than dereferencing a * symlink that was injected during the TOCTTOU between our initial * fstatat() and openat_file(). */ if (O_PATH_9P_UTIL) { fstat, S_ISLINK, proc_path, chmod() } else { fchmod() } -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
On 08/09/2017 09:55 AM, Eric Blake wrote: > On 08/09/2017 09:23 AM, Greg Kurz wrote: >> This function has to ensure it doesn't follow a symlink that could be used >> to escape the virtfs directory. This could be easily achieved if fchmodat() >> on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but >> it doesn't. > > Might be worth including a URL of the LKML discussion on the last > version of that patch attempt. > >> >> The current implementation covers most use-cases, but it notably fails if: >> - the target path has access rights equal to 0000 (openat() returns EPERM), >> => once you've done chmod(0000) on a file, you can never chmod() again >> - the target path is UNIX domain socket (openat() returns ENXIO) >> => bind() of UNIX domain sockets fails if the file is on 9pfs >> >> The solution is to use O_PATH: openat() now succeeds in both cases, and we >> can ensure the path isn't a symlink with fstat(). The associated entry in >> "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. > > Hey - should we point this out as a viable solution to the glibc folks, > since their current user-space emulation of AT_SYMLINK_NOFOLLOW is broken? > > Nope, unsafe when O_PATH_9P_UTIL is 0. This needs to be more like: > > /* Now we handle racing symlinks. On kernels without O_PATH, we will > * fail on some corner cases, but that's better than dereferencing a > * symlink that was injected during the TOCTTOU between our initial > * fstatat() and openat_file(). > */ > if (O_PATH_9P_UTIL) { > fstat, S_ISLINK, proc_path, chmod() > } else { > fchmod() > } For that matter, I think you also want to avoid the O_WRONLY fallback when O_PATH works, as in: >> - fd = openat_file(dirfd, name, O_RDONLY, 0); >> + fd = openat_file(dirfd, name, O_RDONLY | O_PATH_9P_UTIL, 0); >> if (fd == -1) { changing this to 'if (fd == -1 && !O_PATH_9P_UTIL)' -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
On Wed, 9 Aug 2017 10:01:14 -0500 Eric Blake <eblake@redhat.com> wrote: > On 08/09/2017 09:55 AM, Eric Blake wrote: > > On 08/09/2017 09:23 AM, Greg Kurz wrote: > >> This function has to ensure it doesn't follow a symlink that could be used > >> to escape the virtfs directory. This could be easily achieved if fchmodat() > >> on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but > >> it doesn't. > > > > Might be worth including a URL of the LKML discussion on the last > > version of that patch attempt. > > > >> > >> The current implementation covers most use-cases, but it notably fails if: > >> - the target path has access rights equal to 0000 (openat() returns EPERM), > >> => once you've done chmod(0000) on a file, you can never chmod() again > >> - the target path is UNIX domain socket (openat() returns ENXIO) > >> => bind() of UNIX domain sockets fails if the file is on 9pfs > >> > >> The solution is to use O_PATH: openat() now succeeds in both cases, and we > >> can ensure the path isn't a symlink with fstat(). The associated entry in > >> "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. > > > > Hey - should we point this out as a viable solution to the glibc folks, > > since their current user-space emulation of AT_SYMLINK_NOFOLLOW is broken? > > > > > > Nope, unsafe when O_PATH_9P_UTIL is 0. This needs to be more like: > > > > /* Now we handle racing symlinks. On kernels without O_PATH, we will > > * fail on some corner cases, but that's better than dereferencing a > > * symlink that was injected during the TOCTTOU between our initial > > * fstatat() and openat_file(). > > */ > > if (O_PATH_9P_UTIL) { > > fstat, S_ISLINK, proc_path, chmod() > > } else { > > fchmod() > > } > > For that matter, I think you also want to avoid the O_WRONLY fallback > when O_PATH works, as in: > > >> - fd = openat_file(dirfd, name, O_RDONLY, 0); > >> + fd = openat_file(dirfd, name, O_RDONLY | O_PATH_9P_UTIL, 0); > >> if (fd == -1) { > > changing this to 'if (fd == -1 && !O_PATH_9P_UTIL)' > Yes, will do.
On Wed, 9 Aug 2017 09:55:32 -0500 Eric Blake <eblake@redhat.com> wrote: > On 08/09/2017 09:23 AM, Greg Kurz wrote: > > This function has to ensure it doesn't follow a symlink that could be used > > to escape the virtfs directory. This could be easily achieved if fchmodat() > > on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but > > it doesn't. > > Might be worth including a URL of the LKML discussion on the last > version of that patch attempt. > Ok. > > > > The current implementation covers most use-cases, but it notably fails if: > > - the target path has access rights equal to 0000 (openat() returns EPERM), > > => once you've done chmod(0000) on a file, you can never chmod() again > > - the target path is UNIX domain socket (openat() returns ENXIO) > > => bind() of UNIX domain sockets fails if the file is on 9pfs > > > > The solution is to use O_PATH: openat() now succeeds in both cases, and we > > can ensure the path isn't a symlink with fstat(). The associated entry in > > "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. > > Hey - should we point this out as a viable solution to the glibc folks, > since their current user-space emulation of AT_SYMLINK_NOFOLLOW is broken? > Probably. What's the best way to do that ? > > > > The previous behavior is kept for older systems that don't have O_PATH. > > > > Signed-off-by: Greg Kurz <groug@kaod.org> > > --- > > v2: - renamed OPENAT_DIR_O_PATH to O_PATH_9P_UTIL and use it as a replacement > > for O_PATH to avoid build breaks on O_PATH-less systems > > - keep current behavior for O_PATH-less systems > > - added comments > > - TODO in 2.11: add _nofollow suffix to openat_dir() and openat_file() > > --- > > hw/9pfs/9p-local.c | 41 +++++++++++++++++++++++++++++++++-------- > > hw/9pfs/9p-util.h | 24 +++++++++++++++--------- > > 2 files changed, 48 insertions(+), 17 deletions(-) > > > > + /* First, we clear non-racing symlinks out of the way. */ > > + if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW)) { > > + return -1; > > + } > > + if (S_ISLNK(stbuf.st_mode)) { > > + errno = ELOOP; > > I don't know if ELOOP is the best errno value to use here, but I don't > have any better suggestions so I'm okay with it. > > > + return -1; > > + } > > + > > + /* Access modes are ignored when O_PATH is supported. We try O_RDONLY and > > + * O_WRONLY for old-systems that don't support O_PATH. > > */ > > - fd = openat_file(dirfd, name, O_RDONLY, 0); > > + fd = openat_file(dirfd, name, O_RDONLY | O_PATH_9P_UTIL, 0); > > if (fd == -1) { > > /* In case the file is writable-only and isn't a directory. */ > > if (errno == EACCES) { > > @@ -356,7 +366,22 @@ static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) > > if (fd == -1) { > > return -1; > > } > > - ret = fchmod(fd, mode); > > + > > + /* Now we handle racing symlinks. */ > > + ret = fstat(fd, &stbuf); > > + if (ret) { > > + goto out; > > + } > > + if (S_ISLNK(stbuf.st_mode)) { > > + errno = ELOOP; > > + ret = -1; > > + goto out; > > + } > > + > > + proc_path = g_strdup_printf("/proc/self/fd/%d", fd); > > + ret = chmod(proc_path, mode); > > Nope, unsafe when O_PATH_9P_UTIL is 0. This needs to be more like: > > /* Now we handle racing symlinks. On kernels without O_PATH, we will > * fail on some corner cases, but that's better than dereferencing a > * symlink that was injected during the TOCTTOU between our initial > * fstatat() and openat_file(). > */ > if (O_PATH_9P_UTIL) { > fstat, S_ISLINK, proc_path, chmod() > } else { > fchmod() > } > Oops, you're right. I'll fix that.
On 08/09/2017 10:22 AM, Greg Kurz wrote: >>> >>> The solution is to use O_PATH: openat() now succeeds in both cases, and we >>> can ensure the path isn't a symlink with fstat(). The associated entry in >>> "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. >> >> Hey - should we point this out as a viable solution to the glibc folks, >> since their current user-space emulation of AT_SYMLINK_NOFOLLOW is broken? >> > > Probably. What's the best way to do that ? I've added a comment to https://sourceware.org/bugzilla/show_bug.cgi?id=14578; you'll also want to point to the lkml discussion in that bug. And reading that bug, it also looks like your hack with /proc/self/fd has been proposed by Rich Felker since 2013! (although fstat() didn't work until Linux 3.6, even though O_PATH predates that time) - so there is that one additional concern of whether we need to cater to the window of kernels where O_PATH exists but fstat() on that fd can't learn whether we opened a symlink. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
On Wed, 9 Aug 2017 10:59:46 -0500 Eric Blake <eblake@redhat.com> wrote: > On 08/09/2017 10:22 AM, Greg Kurz wrote: > > >>> > >>> The solution is to use O_PATH: openat() now succeeds in both cases, and we > >>> can ensure the path isn't a symlink with fstat(). The associated entry in > >>> "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. > >> > >> Hey - should we point this out as a viable solution to the glibc folks, > >> since their current user-space emulation of AT_SYMLINK_NOFOLLOW is broken? > >> > > > > Probably. What's the best way to do that ? > > I've added a comment to > https://sourceware.org/bugzilla/show_bug.cgi?id=14578; you'll also want > to point to the lkml discussion in that bug. And reading that bug, it > also looks like your hack with /proc/self/fd has been proposed by Rich > Felker since 2013! (although fstat() didn't work until Linux 3.6, even > though O_PATH predates that time) - so there is that one additional > concern of whether we need to cater to the window of kernels where > O_PATH exists but fstat() on that fd can't learn whether we opened a > symlink. > BTW, what happens with fstat() and O_PATH before Linux 3.6 ? Does it fail or does it return something wrong in th stat buf ?
09.08.2017 17:23, Greg Kurz wrote: > This function has to ensure it doesn't follow a symlink that could be used > to escape the virtfs directory. This could be easily achieved if fchmodat() > on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but > it doesn't. > > The current implementation covers most use-cases, but it notably fails if: > - the target path has access rights equal to 0000 (openat() returns EPERM), > => once you've done chmod(0000) on a file, you can never chmod() again > - the target path is UNIX domain socket (openat() returns ENXIO) > => bind() of UNIX domain sockets fails if the file is on 9pfs > > The solution is to use O_PATH: openat() now succeeds in both cases, and we > can ensure the path isn't a symlink with fstat(). The associated entry in > "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. How we can ensure the path isn't a symlink using fstat() ? As far as I understand, fstat NEVER, EVER will return S_ISLINK, because we can't actually "open" a symlink itsef, only the target of the symlink. /mjt
On Wed, 9 Aug 2017 18:11:51 +0300 Michael Tokarev <mjt@tls.msk.ru> wrote: > 09.08.2017 17:23, Greg Kurz wrote: > > This function has to ensure it doesn't follow a symlink that could be used > > to escape the virtfs directory. This could be easily achieved if fchmodat() > > on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but > > it doesn't. > > > > The current implementation covers most use-cases, but it notably fails if: > > - the target path has access rights equal to 0000 (openat() returns EPERM), > > => once you've done chmod(0000) on a file, you can never chmod() again > > - the target path is UNIX domain socket (openat() returns ENXIO) > > => bind() of UNIX domain sockets fails if the file is on 9pfs > > > > The solution is to use O_PATH: openat() now succeeds in both cases, and we > > can ensure the path isn't a symlink with fstat(). The associated entry in > > "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. > > How we can ensure the path isn't a symlink using fstat() ? > > As far as I understand, fstat NEVER, EVER will return S_ISLINK, because > we can't actually "open" a symlink itsef, only the target of the symlink. > Except when O_PATH is passed, as stated in open(2): If pathname is a symbolic link and the O_NOFOLLOW flag is also specified, then the call returns a file descriptor referring to the symbolic link. See Eric's program that proves it at: https://lists.gnu.org/archive/html/qemu-devel/2017-08/msg01600.html > /mjt
© 2016 - 2024 Red Hat, Inc.