[PATCH] virtiofsd: jail lo->proc_self_fd

Miklos Szeredi posted 1 patch 4 years ago
Test docker-mingw@fedora passed
Test checkpatch passed
Test asan passed
Test docker-quick@centos7 passed
Test FreeBSD passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20200429124733.22488-1-mszeredi@redhat.com
Maintainers: "Dr. David Alan Gilbert" <dgilbert@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>
tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)
[PATCH] virtiofsd: jail lo->proc_self_fd
Posted by Miklos Szeredi 4 years ago
While it's not possible to escape the proc filesystem through
lo->proc_self_fd, it is possible to escape to the root of the proc
filesystem itself through "../..".

Use a temporary mount for opening lo->proc_self_fd, that has it's root at
/proc/self/fd/, preventing access to the ancestor directories.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 4c35c95b256c..bc9c44c760f4 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -2536,6 +2536,8 @@ static void print_capabilities(void)
 static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
 {
     pid_t child;
+    char template[] = "virtiofsd-XXXXXX";
+    char *tmpdir;
 
     /*
      * Create a new pid namespace for *child* processes.  We'll have to
@@ -2597,12 +2599,33 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
         exit(1);
     }
 
+    tmpdir = mkdtemp(template);
+    if (!tmpdir) {
+        fuse_log(FUSE_LOG_ERR, "tmpdir(%s): %m\n", template);
+        exit(1);
+    }
+
+    if (mount("/proc/self/fd", tmpdir, NULL, MS_BIND, NULL) < 0) {
+        fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, %s, MS_BIND): %m\n",
+                 tmpdir);
+        exit(1);
+    }
+
     /* Now we can get our /proc/self/fd directory file descriptor */
-    lo->proc_self_fd = open("/proc/self/fd", O_PATH);
+    lo->proc_self_fd = open(tmpdir, O_PATH);
     if (lo->proc_self_fd == -1) {
-        fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n");
+        fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", tmpdir);
         exit(1);
     }
+
+    if (umount2(tmpdir, MNT_DETACH) < 0) {
+        fuse_log(FUSE_LOG_ERR, "umount2(%s, MNT_DETACH): %m\n", tmpdir);
+        exit(1);
+    }
+
+    if (rmdir(tmpdir) < 0) {
+        fuse_log(FUSE_LOG_ERR, "rmdir(%s): %m\n", tmpdir);
+    }
 }
 
 /*
-- 
2.21.1


Re: [Virtio-fs] [PATCH] virtiofsd: jail lo->proc_self_fd
Posted by Vivek Goyal 4 years ago
On Wed, Apr 29, 2020 at 02:47:33PM +0200, Miklos Szeredi wrote:
> While it's not possible to escape the proc filesystem through
> lo->proc_self_fd, it is possible to escape to the root of the proc
> filesystem itself through "../..".

Hi Miklos,

So this attack will work with some form of *at(lo->proc_self_fd, "../..")
call?

> 
> Use a temporary mount for opening lo->proc_self_fd, that has it's root at
> /proc/self/fd/, preventing access to the ancestor directories.

Does this mean that now similar attack can happen using "../.." on tmpdir
fd instead and be able to look at peers of tmpdir. Or it is blocked
due to mount point or something else. 

Thanks
Vivek

> 
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
>  tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++--
>  1 file changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index 4c35c95b256c..bc9c44c760f4 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -2536,6 +2536,8 @@ static void print_capabilities(void)
>  static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
>  {
>      pid_t child;
> +    char template[] = "virtiofsd-XXXXXX";
> +    char *tmpdir;
>  
>      /*
>       * Create a new pid namespace for *child* processes.  We'll have to
> @@ -2597,12 +2599,33 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
>          exit(1);
>      }
>  
> +    tmpdir = mkdtemp(template);
> +    if (!tmpdir) {
> +        fuse_log(FUSE_LOG_ERR, "tmpdir(%s): %m\n", template);
> +        exit(1);
> +    }
> +
> +    if (mount("/proc/self/fd", tmpdir, NULL, MS_BIND, NULL) < 0) {
> +        fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, %s, MS_BIND): %m\n",
> +                 tmpdir);
> +        exit(1);
> +    }
> +
>      /* Now we can get our /proc/self/fd directory file descriptor */
> -    lo->proc_self_fd = open("/proc/self/fd", O_PATH);
> +    lo->proc_self_fd = open(tmpdir, O_PATH);
>      if (lo->proc_self_fd == -1) {
> -        fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n");
> +        fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", tmpdir);
>          exit(1);
>      }
> +
> +    if (umount2(tmpdir, MNT_DETACH) < 0) {
> +        fuse_log(FUSE_LOG_ERR, "umount2(%s, MNT_DETACH): %m\n", tmpdir);
> +        exit(1);
> +    }
> +
> +    if (rmdir(tmpdir) < 0) {
> +        fuse_log(FUSE_LOG_ERR, "rmdir(%s): %m\n", tmpdir);
> +    }
>  }
>  
>  /*
> -- 
> 2.21.1
> 
> 
> _______________________________________________
> Virtio-fs mailing list
> Virtio-fs@redhat.com
> https://www.redhat.com/mailman/listinfo/virtio-fs


Re: [Virtio-fs] [PATCH] virtiofsd: jail lo->proc_self_fd
Posted by Miklos Szeredi 4 years ago
On Wed, Apr 29, 2020 at 4:36 PM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Wed, Apr 29, 2020 at 02:47:33PM +0200, Miklos Szeredi wrote:
> > While it's not possible to escape the proc filesystem through
> > lo->proc_self_fd, it is possible to escape to the root of the proc
> > filesystem itself through "../..".
>
> Hi Miklos,
>
> So this attack will work with some form of *at(lo->proc_self_fd, "../..")
> call?

Right.

>
> >
> > Use a temporary mount for opening lo->proc_self_fd, that has it's root at
> > /proc/self/fd/, preventing access to the ancestor directories.
>
> Does this mean that now similar attack can happen using "../.." on tmpdir
> fd instead and be able to look at peers of tmpdir. Or it is blocked
> due to mount point or something else.

No, because tmpdir is detached, the root of that tree will be the
directory pointed to by the fd.  ".." will just lead to the same
directory.

Thanks,
Miklos


Re: [Virtio-fs] [PATCH] virtiofsd: jail lo->proc_self_fd
Posted by Miklos Szeredi 4 years ago
On Wed, Apr 29, 2020 at 4:47 PM Miklos Szeredi <mszeredi@redhat.com> wrote:
>
> On Wed, Apr 29, 2020 at 4:36 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> >
> > On Wed, Apr 29, 2020 at 02:47:33PM +0200, Miklos Szeredi wrote:
> > > While it's not possible to escape the proc filesystem through
> > > lo->proc_self_fd, it is possible to escape to the root of the proc
> > > filesystem itself through "../..".
> >
> > Hi Miklos,
> >
> > So this attack will work with some form of *at(lo->proc_self_fd, "../..")
> > call?
>
> Right.
>
> >
> > >
> > > Use a temporary mount for opening lo->proc_self_fd, that has it's root at
> > > /proc/self/fd/, preventing access to the ancestor directories.
> >
> > Does this mean that now similar attack can happen using "../.." on tmpdir
> > fd instead and be able to look at peers of tmpdir. Or it is blocked
> > due to mount point or something else.
>
> No, because tmpdir is detached, the root of that tree will be the
> directory pointed to by the fd.  ".." will just lead to the same
> directory.

BTW, I would have liked to do this without a temp directory, but
apparently the fancy new mount stuff isn't up to this task, or at
least I haven't figured out yet.

Thanks,
Miklos


Re: [Virtio-fs] [PATCH] virtiofsd: jail lo->proc_self_fd
Posted by Vivek Goyal 4 years ago
On Wed, Apr 29, 2020 at 04:47:19PM +0200, Miklos Szeredi wrote:
> On Wed, Apr 29, 2020 at 4:36 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> >
> > On Wed, Apr 29, 2020 at 02:47:33PM +0200, Miklos Szeredi wrote:
> > > While it's not possible to escape the proc filesystem through
> > > lo->proc_self_fd, it is possible to escape to the root of the proc
> > > filesystem itself through "../..".
> >
> > Hi Miklos,
> >
> > So this attack will work with some form of *at(lo->proc_self_fd, "../..")
> > call?
> 
> Right.
> 
> >
> > >
> > > Use a temporary mount for opening lo->proc_self_fd, that has it's root at
> > > /proc/self/fd/, preventing access to the ancestor directories.
> >
> > Does this mean that now similar attack can happen using "../.." on tmpdir
> > fd instead and be able to look at peers of tmpdir. Or it is blocked
> > due to mount point or something else.
> 
> No, because tmpdir is detached, the root of that tree will be the
> directory pointed to by the fd.  ".." will just lead to the same
> directory.

Aha.. got it. Thanks.

One more question. We seem to be using PID namespaces. Can't we mount
fresh instance of proc so that we don't see other processes apart from
virtiofd. May be we are already doing it. I have not checked it yet. If
yes, that should mitigate this concern?

Also noticed private proc patches upstream which should allow mounting
private instance of proc even without the need of separate PID namespace.

Thanks
Vivek


Re: [Virtio-fs] [PATCH] virtiofsd: jail lo->proc_self_fd
Posted by Miklos Szeredi 4 years ago
On Wed, Apr 29, 2020 at 5:00 PM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Wed, Apr 29, 2020 at 04:47:19PM +0200, Miklos Szeredi wrote:
> > On Wed, Apr 29, 2020 at 4:36 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> > >
> > > On Wed, Apr 29, 2020 at 02:47:33PM +0200, Miklos Szeredi wrote:
> > > > While it's not possible to escape the proc filesystem through
> > > > lo->proc_self_fd, it is possible to escape to the root of the proc
> > > > filesystem itself through "../..".
> > >
> > > Hi Miklos,
> > >
> > > So this attack will work with some form of *at(lo->proc_self_fd, "../..")
> > > call?
> >
> > Right.
> >
> > >
> > > >
> > > > Use a temporary mount for opening lo->proc_self_fd, that has it's root at
> > > > /proc/self/fd/, preventing access to the ancestor directories.
> > >
> > > Does this mean that now similar attack can happen using "../.." on tmpdir
> > > fd instead and be able to look at peers of tmpdir. Or it is blocked
> > > due to mount point or something else.
> >
> > No, because tmpdir is detached, the root of that tree will be the
> > directory pointed to by the fd.  ".." will just lead to the same
> > directory.
>
> Aha.. got it. Thanks.
>
> One more question. We seem to be using PID namespaces. Can't we mount
> fresh instance of proc so that we don't see other processes apart from
> virtiofd. May be we are already doing it. I have not checked it yet. If
> yes, that should mitigate this concern?

Yes, it's doing that already just above this patch:

    /* The child must remount /proc to use the new pid namespace */
    if (mount("proc", "/proc", "proc",
              MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) {
        fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n");
        exit(1);
    }

Thanks,
Miklos


Re: [Virtio-fs] [PATCH] virtiofsd: jail lo->proc_self_fd
Posted by Stefan Hajnoczi 4 years ago
On Wed, Apr 29, 2020 at 02:47:33PM +0200, Miklos Szeredi wrote:
> While it's not possible to escape the proc filesystem through
> lo->proc_self_fd, it is possible to escape to the root of the proc
> filesystem itself through "../..".
> 
> Use a temporary mount for opening lo->proc_self_fd, that has it's root at
> /proc/self/fd/, preventing access to the ancestor directories.
> 
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
>  tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++--
>  1 file changed, 25 insertions(+), 2 deletions(-)

Good idea!

It's important to note that the proc file system is already mounted
within a new pid namespace.  Therefore the only process visible is our
own process and we don't need to worry about /proc/$PID.  However, there
are a bunch of other files in /proc.  Some of them are protected by
capability checks like /proc/kcore and virtiofsd is unable to access
them, but it's hard to guarantee that they are all off limits.  Better
safe than sorry!

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Re: [Virtio-fs] [PATCH] virtiofsd: jail lo->proc_self_fd
Posted by Dr. David Alan Gilbert 4 years ago
* Stefan Hajnoczi (stefanha@redhat.com) wrote:
> On Wed, Apr 29, 2020 at 02:47:33PM +0200, Miklos Szeredi wrote:
> > While it's not possible to escape the proc filesystem through
> > lo->proc_self_fd, it is possible to escape to the root of the proc
> > filesystem itself through "../..".
> > 
> > Use a temporary mount for opening lo->proc_self_fd, that has it's root at
> > /proc/self/fd/, preventing access to the ancestor directories.
> > 
> > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > ---
> >  tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++--
> >  1 file changed, 25 insertions(+), 2 deletions(-)
> 
> Good idea!
> 
> It's important to note that the proc file system is already mounted
> within a new pid namespace.  Therefore the only process visible is our
> own process and we don't need to worry about /proc/$PID.  However, there
> are a bunch of other files in /proc.  Some of them are protected by
> capability checks like /proc/kcore and virtiofsd is unable to access
> them, but it's hard to guarantee that they are all off limits.  Better
> safe than sorry!
> 
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

Thanks; I've picked this up.

Dave

> _______________________________________________
> Virtio-fs mailing list
> Virtio-fs@redhat.com
> https://www.redhat.com/mailman/listinfo/virtio-fs

--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK


Re: [PATCH] virtiofsd: jail lo->proc_self_fd
Posted by Dr. David Alan Gilbert 4 years ago
* Miklos Szeredi (mszeredi@redhat.com) wrote:
> While it's not possible to escape the proc filesystem through
> lo->proc_self_fd, it is possible to escape to the root of the proc
> filesystem itself through "../..".
> 
> Use a temporary mount for opening lo->proc_self_fd, that has it's root at
> /proc/self/fd/, preventing access to the ancestor directories.
> 
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>

Queued

> ---
>  tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++--
>  1 file changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index 4c35c95b256c..bc9c44c760f4 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -2536,6 +2536,8 @@ static void print_capabilities(void)
>  static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
>  {
>      pid_t child;
> +    char template[] = "virtiofsd-XXXXXX";
> +    char *tmpdir;
>  
>      /*
>       * Create a new pid namespace for *child* processes.  We'll have to
> @@ -2597,12 +2599,33 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
>          exit(1);
>      }
>  
> +    tmpdir = mkdtemp(template);
> +    if (!tmpdir) {
> +        fuse_log(FUSE_LOG_ERR, "tmpdir(%s): %m\n", template);
> +        exit(1);
> +    }
> +
> +    if (mount("/proc/self/fd", tmpdir, NULL, MS_BIND, NULL) < 0) {
> +        fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, %s, MS_BIND): %m\n",
> +                 tmpdir);
> +        exit(1);
> +    }
> +
>      /* Now we can get our /proc/self/fd directory file descriptor */
> -    lo->proc_self_fd = open("/proc/self/fd", O_PATH);
> +    lo->proc_self_fd = open(tmpdir, O_PATH);
>      if (lo->proc_self_fd == -1) {
> -        fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n");
> +        fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", tmpdir);
>          exit(1);
>      }
> +
> +    if (umount2(tmpdir, MNT_DETACH) < 0) {
> +        fuse_log(FUSE_LOG_ERR, "umount2(%s, MNT_DETACH): %m\n", tmpdir);
> +        exit(1);
> +    }
> +
> +    if (rmdir(tmpdir) < 0) {
> +        fuse_log(FUSE_LOG_ERR, "rmdir(%s): %m\n", tmpdir);
> +    }
>  }
>  
>  /*
> -- 
> 2.21.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK