[v4] vfio-user server in QEMU

[PATCH v4 07/14] vfio-user: run vfio-user context

Posted by Jagannathan Raman 4 years, 1 month ago

Setup a handler to run vfio-user context. The context is driven by
messages to the file descriptor associated with it - get the fd for
the context and hook up the handler with it

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 hw/remote/vfio-user-obj.c | 80 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index bcbea59bf1..a01a0ad185 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -42,10 +42,12 @@
 #include "qapi/error.h"
 #include "qapi/qapi-visit-sockets.h"
 #include "qemu/notify.h"
+#include "qemu/thread.h"
 #include "sysemu/sysemu.h"
 #include "libvfio-user.h"
 #include "hw/qdev-core.h"
 #include "hw/pci/pci.h"
+#include "qemu/timer.h"
 
 #define TYPE_VFU_OBJECT "x-vfio-user-server"
 OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
@@ -73,6 +75,8 @@ struct VfuObject {
     vfu_ctx_t *vfu_ctx;
 
     PCIDevice *pci_dev;
+
+    int vfu_poll_fd;
 };
 
 static void vfu_object_init_ctx(VfuObject *o, Error **errp);
@@ -114,6 +118,62 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
     vfu_object_init_ctx(o, errp);
 }
 
+static void vfu_object_ctx_run(void *opaque)
+{
+    VfuObject *o = opaque;
+    int ret = -1;
+
+    while (ret != 0) {
+        ret = vfu_run_ctx(o->vfu_ctx);
+        if (ret < 0) {
+            if (errno == EINTR) {
+                continue;
+            } else if (errno == ENOTCONN) {
+                qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
+                o->vfu_poll_fd = -1;
+                object_unparent(OBJECT(o));
+                break;
+            } else {
+                error_setg(&error_abort, "vfu: Failed to run device %s - %s",
+                           o->device, strerror(errno));
+                 break;
+            }
+        }
+    }
+}
+
+static void vfu_object_attach_ctx(void *opaque)
+{
+    VfuObject *o = opaque;
+    GPollFD pfds[1];
+    int ret;
+
+    qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
+
+    pfds[0].fd = o->vfu_poll_fd;
+    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
+
+retry_attach:
+    ret = vfu_attach_ctx(o->vfu_ctx);
+    if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
+        qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
+        goto retry_attach;
+    } else if (ret < 0) {
+        error_setg(&error_abort,
+                   "vfu: Failed to attach device %s to context - %s",
+                   o->device, strerror(errno));
+        return;
+    }
+
+    o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
+    if (o->vfu_poll_fd < 0) {
+        error_setg(&error_abort, "vfu: Failed to get poll fd %s", o->device);
+        return;
+    }
+
+    qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o);
+}
+
 /*
  * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
  * properties. It also depends on devices instantiated in QEMU. These
@@ -151,7 +211,8 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
         return;
     }
 
-    o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, 0,
+    o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path,
+                                LIBVFIO_USER_FLAG_ATTACH_NB,
                                 o, VFU_DEV_TYPE_PCI);
     if (o->vfu_ctx == NULL) {
         error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
@@ -183,6 +244,21 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
         goto fail;
     }
 
+    ret = vfu_realize_ctx(o->vfu_ctx);
+    if (ret < 0) {
+        error_setg(errp, "vfu: Failed to realize device %s- %s",
+                   o->device, strerror(errno));
+        goto fail;
+    }
+
+    o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
+    if (o->vfu_poll_fd < 0) {
+        error_setg(errp, "vfu: Failed to get poll fd %s", o->device);
+        goto fail;
+    }
+
+    qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
+
     return;
 
 fail:
@@ -208,6 +284,8 @@ static void vfu_object_init(Object *obj)
                    TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
         return;
     }
+
+    o->vfu_poll_fd = -1;
 }
 
 static void vfu_object_finalize(Object *obj)
-- 
2.20.1

Re: [PATCH v4 07/14] vfio-user: run vfio-user context

Posted by Stefan Hajnoczi 4 years, 1 month ago

On Wed, Dec 15, 2021 at 10:35:31AM -0500, Jagannathan Raman wrote:
> @@ -114,6 +118,62 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
>      vfu_object_init_ctx(o, errp);
>  }
>  
> +static void vfu_object_ctx_run(void *opaque)
> +{
> +    VfuObject *o = opaque;
> +    int ret = -1;
> +
> +    while (ret != 0) {
> +        ret = vfu_run_ctx(o->vfu_ctx);
> +        if (ret < 0) {
> +            if (errno == EINTR) {
> +                continue;
> +            } else if (errno == ENOTCONN) {
> +                qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
> +                o->vfu_poll_fd = -1;
> +                object_unparent(OBJECT(o));
> +                break;

If nothing else logs a message then I think that should be done here so
users know why their vfio-user server object disappeared.

> +            } else {
> +                error_setg(&error_abort, "vfu: Failed to run device %s - %s",
> +                           o->device, strerror(errno));

error_abort is equivalent to assuming !o->daemon. In the case where the
user doesn't want to automatically shut down the process we need to log
a message without aborting.

> +                 break;

Indentation is off.

> +            }
> +        }
> +    }
> +}
> +
> +static void vfu_object_attach_ctx(void *opaque)
> +{
> +    VfuObject *o = opaque;
> +    GPollFD pfds[1];
> +    int ret;
> +
> +    qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
> +
> +    pfds[0].fd = o->vfu_poll_fd;
> +    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> +
> +retry_attach:
> +    ret = vfu_attach_ctx(o->vfu_ctx);
> +    if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
> +        qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
> +        goto retry_attach;

This can block the thread indefinitely. Other events like monitor
commands are not handled in this loop. Please make this asynchronous
(set an fd handler and return from this function so we can try again
later).

The vfu_attach_ctx() implementation synchronously negotiates the
vfio-user connection :(. That's a shame because even if accept(2) is
handled asynchronously, the negotiation can still block. It would be
cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to
avoid blocking. Is that possible?

If libvfio-user can't make vfu_attach_ctx() fully async then it may be
possible to spawn a thread just for the blocking vfu_attach_ctx() call
and report the result back to the event loop (e.g. using EventNotifier).
That adds a bunch of code to work around a blocking API though, so I
guess we can leave the blocking part if necessary.

At the very minimum, please make EAGAIN/EWOULDBLOCK async as mentioned
above and add a comment explaining the situation with the
partially-async vfu_attach_ctx() API so it's clear that this can block
(that way it's clear that you're aware of the issue and this isn't by
accident).

> +    } else if (ret < 0) {
> +        error_setg(&error_abort,
> +                   "vfu: Failed to attach device %s to context - %s",
> +                   o->device, strerror(errno));

error_abort assumes !o->daemon. Please handle the o->daemon == true
case by logging an error without aborting.

> +        return;
> +    }
> +
> +    o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
> +    if (o->vfu_poll_fd < 0) {
> +        error_setg(&error_abort, "vfu: Failed to get poll fd %s", o->device);

Same here.

> @@ -208,6 +284,8 @@ static void vfu_object_init(Object *obj)
>                     TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
>          return;
>      }
> +
> +    o->vfu_poll_fd = -1;
>  }

This must call qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL)
when o->vfu_poll_fd != -1 to avoid leaving a dangling fd handler
callback registered.

Re: [PATCH v4 07/14] vfio-user: run vfio-user context

Posted by Jag Raman 4 years, 1 month ago


> On Dec 16, 2021, at 6:17 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> 
> On Wed, Dec 15, 2021 at 10:35:31AM -0500, Jagannathan Raman wrote:
>> @@ -114,6 +118,62 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
>>     vfu_object_init_ctx(o, errp);
>> }
>> 
>> +static void vfu_object_ctx_run(void *opaque)
>> +{
>> +    VfuObject *o = opaque;
>> +    int ret = -1;
>> +
>> +    while (ret != 0) {
>> +        ret = vfu_run_ctx(o->vfu_ctx);
>> +        if (ret < 0) {
>> +            if (errno == EINTR) {
>> +                continue;
>> +            } else if (errno == ENOTCONN) {
>> +                qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
>> +                o->vfu_poll_fd = -1;
>> +                object_unparent(OBJECT(o));
>> +                break;
> 
> If nothing else logs a message then I think that should be done here so
> users know why their vfio-user server object disappeared.

Sure will do.

Do you prefer a trace, or a message to the console? Trace makes sense to me.
Presently, the client could unplug the vfio-user device which would trigger the
deletion of this object. This process could happen quietly.

> 
>> +            } else {
>> +                error_setg(&error_abort, "vfu: Failed to run device %s - %s",
>> +                           o->device, strerror(errno));
> 
> error_abort is equivalent to assuming !o->daemon. In the case where the
> user doesn't want to automatically shut down the process we need to log
> a message without aborting.

OK, makes sense.

> 
>> +                 break;
> 
> Indentation is off.
> 
>> +            }
>> +        }
>> +    }
>> +}
>> +
>> +static void vfu_object_attach_ctx(void *opaque)
>> +{
>> +    VfuObject *o = opaque;
>> +    GPollFD pfds[1];
>> +    int ret;
>> +
>> +    qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
>> +
>> +    pfds[0].fd = o->vfu_poll_fd;
>> +    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
>> +
>> +retry_attach:
>> +    ret = vfu_attach_ctx(o->vfu_ctx);
>> +    if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
>> +        qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
>> +        goto retry_attach;
> 
> This can block the thread indefinitely. Other events like monitor
> commands are not handled in this loop. Please make this asynchronous
> (set an fd handler and return from this function so we can try again
> later).
> 
> The vfu_attach_ctx() implementation synchronously negotiates the
> vfio-user connection :(. That's a shame because even if accept(2) is
> handled asynchronously, the negotiation can still block. It would be
> cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to
> avoid blocking. Is that possible?

Thanos / John,

    Any thoughts on this?

> 
> If libvfio-user can't make vfu_attach_ctx() fully async then it may be
> possible to spawn a thread just for the blocking vfu_attach_ctx() call
> and report the result back to the event loop (e.g. using EventNotifier).
> That adds a bunch of code to work around a blocking API though, so I
> guess we can leave the blocking part if necessary.
> 
> At the very minimum, please make EAGAIN/EWOULDBLOCK async as mentioned
> above and add a comment explaining the situation with the
> partially-async vfu_attach_ctx() API so it's clear that this can block
> (that way it's clear that you're aware of the issue and this isn't by
> accident).

Sure, we could make the attach async at QEMU depending on how the
library prefers to do this.

> 
>> +    } else if (ret < 0) {
>> +        error_setg(&error_abort,
>> +                   "vfu: Failed to attach device %s to context - %s",
>> +                   o->device, strerror(errno));
> 
> error_abort assumes !o->daemon. Please handle the o->daemon == true
> case by logging an error without aborting.
> 
>> +        return;
>> +    }
>> +
>> +    o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
>> +    if (o->vfu_poll_fd < 0) {
>> +        error_setg(&error_abort, "vfu: Failed to get poll fd %s", o->device);
> 
> Same here.
> 
>> @@ -208,6 +284,8 @@ static void vfu_object_init(Object *obj)
>>                    TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
>>         return;
>>     }
>> +
>> +    o->vfu_poll_fd = -1;
>> }
> 
> This must call qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL)
> when o->vfu_poll_fd != -1 to avoid leaving a dangling fd handler
> callback registered.

This is during the init phase, and the FD handlers are not set. Do you mean
to add this at finalize?

I agree it’s good to explicitly add this at finalize. But vfu_destroy_ctx() should
trigger a ENOTCONN, which would do it anyway.

Thank you!
--
Jag

RE: [PATCH v4 07/14] vfio-user: run vfio-user context

Posted by Thanos Makatos 4 years, 1 month ago


> -----Original Message-----
> From: Jag Raman <jag.raman@oracle.com>
> Sent: 17 December 2021 18:00
> To: Stefan Hajnoczi <stefanha@redhat.com>; John Levon
> <john.levon@nutanix.com>; Thanos Makatos <thanos.makatos@nutanix.com>
> Cc: qemu-devel <qemu-devel@nongnu.org>; Alex Williamson
> <alex.williamson@redhat.com>; Marc-André Lureau
> <marcandre.lureau@gmail.com>; Philippe Mathieu-Daudé
> <philmd@redhat.com>; pbonzini@redhat.com; alex.bennee@linaro.org;
> thuth@redhat.com; crosa@redhat.com; wainersm@redhat.com;
> bleal@redhat.com; Elena Ufimtseva <elena.ufimtseva@oracle.com>; John
> Levon <john.levon@nutanix.com>; John Johnson
> <john.g.johnson@oracle.com>; Thanos Makatos
> <thanos.makatos@nutanix.com>; Swapnil Ingle <swapnil.ingle@nutanix.com>
> Subject: Re: [PATCH v4 07/14] vfio-user: run vfio-user context
> 
> 
> 
> > On Dec 16, 2021, at 6:17 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> >
> > On Wed, Dec 15, 2021 at 10:35:31AM -0500, Jagannathan Raman wrote:
> >> @@ -114,6 +118,62 @@ static void vfu_object_set_device(Object *obj,
> const char *str, Error **errp)
> >>     vfu_object_init_ctx(o, errp);
> >> }
> >>
> >> +static void vfu_object_ctx_run(void *opaque)
> >> +{
> >> +    VfuObject *o = opaque;
> >> +    int ret = -1;
> >> +
> >> +    while (ret != 0) {
> >> +        ret = vfu_run_ctx(o->vfu_ctx);
> >> +        if (ret < 0) {
> >> +            if (errno == EINTR) {
> >> +                continue;
> >> +            } else if (errno == ENOTCONN) {
> >> +                qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
> >> +                o->vfu_poll_fd = -1;
> >> +                object_unparent(OBJECT(o));
> >> +                break;
> >
> > If nothing else logs a message then I think that should be done here so
> > users know why their vfio-user server object disappeared.
> 
> Sure will do.
> 
> Do you prefer a trace, or a message to the console? Trace makes sense to me.
> Presently, the client could unplug the vfio-user device which would trigger the
> deletion of this object. This process could happen quietly.
> 
> >
> >> +            } else {
> >> +                error_setg(&error_abort, "vfu: Failed to run device %s - %s",
> >> +                           o->device, strerror(errno));
> >
> > error_abort is equivalent to assuming !o->daemon. In the case where the
> > user doesn't want to automatically shut down the process we need to log
> > a message without aborting.
> 
> OK, makes sense.
> 
> >
> >> +                 break;
> >
> > Indentation is off.
> >
> >> +            }
> >> +        }
> >> +    }
> >> +}
> >> +
> >> +static void vfu_object_attach_ctx(void *opaque)
> >> +{
> >> +    VfuObject *o = opaque;
> >> +    GPollFD pfds[1];
> >> +    int ret;
> >> +
> >> +    qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
> >> +
> >> +    pfds[0].fd = o->vfu_poll_fd;
> >> +    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> >> +
> >> +retry_attach:
> >> +    ret = vfu_attach_ctx(o->vfu_ctx);
> >> +    if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
> >> +        qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
> >> +        goto retry_attach;
> >
> > This can block the thread indefinitely. Other events like monitor
> > commands are not handled in this loop. Please make this asynchronous
> > (set an fd handler and return from this function so we can try again
> > later).
> >
> > The vfu_attach_ctx() implementation synchronously negotiates the
> > vfio-user connection :(. That's a shame because even if accept(2) is
> > handled asynchronously, the negotiation can still block. It would be
> > cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to
> > avoid blocking. Is that possible?
> 
> Thanos / John,
> 
>     Any thoughts on this?

I'm discussing this with John and FYI there are other places where libvfio-user can block, e.g. sending a response or receiving a command. Is it just the negotiation you want it to be asynchronous or _all_ libvfio-user operations? Making libvfio-user fully asynchronous might require a substantial API rewrite.

> 
> >
> > If libvfio-user can't make vfu_attach_ctx() fully async then it may be
> > possible to spawn a thread just for the blocking vfu_attach_ctx() call
> > and report the result back to the event loop (e.g. using EventNotifier).
> > That adds a bunch of code to work around a blocking API though, so I
> > guess we can leave the blocking part if necessary.
> >
> > At the very minimum, please make EAGAIN/EWOULDBLOCK async as
> mentioned
> > above and add a comment explaining the situation with the
> > partially-async vfu_attach_ctx() API so it's clear that this can block
> > (that way it's clear that you're aware of the issue and this isn't by
> > accident).
> 
> Sure, we could make the attach async at QEMU depending on how the
> library prefers to do this.
> 
> >
> >> +    } else if (ret < 0) {
> >> +        error_setg(&error_abort,
> >> +                   "vfu: Failed to attach device %s to context - %s",
> >> +                   o->device, strerror(errno));
> >
> > error_abort assumes !o->daemon. Please handle the o->daemon == true
> > case by logging an error without aborting.
> >
> >> +        return;
> >> +    }
> >> +
> >> +    o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
> >> +    if (o->vfu_poll_fd < 0) {
> >> +        error_setg(&error_abort, "vfu: Failed to get poll fd %s", o->device);
> >
> > Same here.
> >
> >> @@ -208,6 +284,8 @@ static void vfu_object_init(Object *obj)
> >>                    TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
> >>         return;
> >>     }
> >> +
> >> +    o->vfu_poll_fd = -1;
> >> }
> >
> > This must call qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL)
> > when o->vfu_poll_fd != -1 to avoid leaving a dangling fd handler
> > callback registered.
> 
> This is during the init phase, and the FD handlers are not set. Do you mean
> to add this at finalize?
> 
> I agree it’s good to explicitly add this at finalize. But vfu_destroy_ctx() should
> trigger a ENOTCONN, which would do it anyway.
> 
> Thank you!
> --
> Jag

Re: [PATCH v4 07/14] vfio-user: run vfio-user context

Posted by Stefan Hajnoczi 4 years, 1 month ago

On Wed, Jan 05, 2022 at 10:38:10AM +0000, Thanos Makatos wrote:
> 
> 
> > -----Original Message-----
> > From: Jag Raman <jag.raman@oracle.com>
> > Sent: 17 December 2021 18:00
> > To: Stefan Hajnoczi <stefanha@redhat.com>; John Levon
> > <john.levon@nutanix.com>; Thanos Makatos <thanos.makatos@nutanix.com>
> > Cc: qemu-devel <qemu-devel@nongnu.org>; Alex Williamson
> > <alex.williamson@redhat.com>; Marc-André Lureau
> > <marcandre.lureau@gmail.com>; Philippe Mathieu-Daudé
> > <philmd@redhat.com>; pbonzini@redhat.com; alex.bennee@linaro.org;
> > thuth@redhat.com; crosa@redhat.com; wainersm@redhat.com;
> > bleal@redhat.com; Elena Ufimtseva <elena.ufimtseva@oracle.com>; John
> > Levon <john.levon@nutanix.com>; John Johnson
> > <john.g.johnson@oracle.com>; Thanos Makatos
> > <thanos.makatos@nutanix.com>; Swapnil Ingle <swapnil.ingle@nutanix.com>
> > Subject: Re: [PATCH v4 07/14] vfio-user: run vfio-user context
> > 
> > 
> > 
> > > On Dec 16, 2021, at 6:17 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> > >
> > > On Wed, Dec 15, 2021 at 10:35:31AM -0500, Jagannathan Raman wrote:
> > >> @@ -114,6 +118,62 @@ static void vfu_object_set_device(Object *obj,
> > const char *str, Error **errp)
> > >>     vfu_object_init_ctx(o, errp);
> > >> }
> > >>
> > >> +static void vfu_object_ctx_run(void *opaque)
> > >> +{
> > >> +    VfuObject *o = opaque;
> > >> +    int ret = -1;
> > >> +
> > >> +    while (ret != 0) {
> > >> +        ret = vfu_run_ctx(o->vfu_ctx);
> > >> +        if (ret < 0) {
> > >> +            if (errno == EINTR) {
> > >> +                continue;
> > >> +            } else if (errno == ENOTCONN) {
> > >> +                qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
> > >> +                o->vfu_poll_fd = -1;
> > >> +                object_unparent(OBJECT(o));
> > >> +                break;
> > >
> > > If nothing else logs a message then I think that should be done here so
> > > users know why their vfio-user server object disappeared.
> > 
> > Sure will do.
> > 
> > Do you prefer a trace, or a message to the console? Trace makes sense to me.
> > Presently, the client could unplug the vfio-user device which would trigger the
> > deletion of this object. This process could happen quietly.
> > 
> > >
> > >> +            } else {
> > >> +                error_setg(&error_abort, "vfu: Failed to run device %s - %s",
> > >> +                           o->device, strerror(errno));
> > >
> > > error_abort is equivalent to assuming !o->daemon. In the case where the
> > > user doesn't want to automatically shut down the process we need to log
> > > a message without aborting.
> > 
> > OK, makes sense.
> > 
> > >
> > >> +                 break;
> > >
> > > Indentation is off.
> > >
> > >> +            }
> > >> +        }
> > >> +    }
> > >> +}
> > >> +
> > >> +static void vfu_object_attach_ctx(void *opaque)
> > >> +{
> > >> +    VfuObject *o = opaque;
> > >> +    GPollFD pfds[1];
> > >> +    int ret;
> > >> +
> > >> +    qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
> > >> +
> > >> +    pfds[0].fd = o->vfu_poll_fd;
> > >> +    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> > >> +
> > >> +retry_attach:
> > >> +    ret = vfu_attach_ctx(o->vfu_ctx);
> > >> +    if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
> > >> +        qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
> > >> +        goto retry_attach;
> > >
> > > This can block the thread indefinitely. Other events like monitor
> > > commands are not handled in this loop. Please make this asynchronous
> > > (set an fd handler and return from this function so we can try again
> > > later).
> > >
> > > The vfu_attach_ctx() implementation synchronously negotiates the
> > > vfio-user connection :(. That's a shame because even if accept(2) is
> > > handled asynchronously, the negotiation can still block. It would be
> > > cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to
> > > avoid blocking. Is that possible?
> > 
> > Thanos / John,
> > 
> >     Any thoughts on this?
> 
> I'm discussing this with John and FYI there are other places where libvfio-user can block, e.g. sending a response or receiving a command. Is it just the negotiation you want it to be asynchronous or _all_ libvfio-user operations? Making libvfio-user fully asynchronous might require a substantial API rewrite.

I see at least two reasons for a fully async API:

1. The program wants to handle other events (e.g. a management REST API)
   from the same event loop thread that invokes libvfio-user. If
   libvfio-user blocks then the other events cannot be handled within a
   reasonable time frame.

   The workaround for this is to use multi-threading and ignore the
   event-driven architecture implied by vfu_get_poll_fd().

2. The program handles multiple clients that do not trust each other.
   This could be a software-defined network switch or storage appliance.
   A malicious client can cause a denial-of-service by making a
   libvfio-user call block.

   Again, the program needs separate threads instead of an event loop to
   work around this.

The downside to a sync approach is that programs that already have an
event loop require extra code to set up dedicated threads for
libvfio-user. That's a library integration/usability issue.

In some cases it's okay to block: when the program doesn't need to
handle other events. If most users of libvfio-user are expected to fall
into this category then there's no need to change the API.

Either way, the doc comments in libvfio-user.h aren't very clear.
Someone integrating this library may think vfu_get_poll_fd() allows for
fully async operation.

Stefan

Re: [PATCH v4 07/14] vfio-user: run vfio-user context

Posted by John Levon 4 years ago

On Thu, Jan 06, 2022 at 01:35:32PM +0000, Stefan Hajnoczi wrote:

> > > >> +static void vfu_object_attach_ctx(void *opaque)
> > > >> +{
> > > >> +    VfuObject *o = opaque;
> > > >> +    GPollFD pfds[1];
> > > >> +    int ret;
> > > >> +
> > > >> +    qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
> > > >> +
> > > >> +    pfds[0].fd = o->vfu_poll_fd;
> > > >> +    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> > > >> +
> > > >> +retry_attach:
> > > >> +    ret = vfu_attach_ctx(o->vfu_ctx);
> > > >> +    if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
> > > >> +        qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
> > > >> +        goto retry_attach;
> > > >
> > > > This can block the thread indefinitely. Other events like monitor
> > > > commands are not handled in this loop. Please make this asynchronous
> > > > (set an fd handler and return from this function so we can try again
> > > > later).
> > > >
> > > > The vfu_attach_ctx() implementation synchronously negotiates the
> > > > vfio-user connection :(. That's a shame because even if accept(2) is
> > > > handled asynchronously, the negotiation can still block. It would be
> > > > cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to
> > > > avoid blocking. Is that possible?
> > > 
> > > Thanos / John,
> > > 
> > >     Any thoughts on this?
> > 
> > I'm discussing this with John and FYI there are other places where libvfio-user can block, e.g. sending a response or receiving a command. Is it just the negotiation you want it to be asynchronous or _all_ libvfio-user operations? Making libvfio-user fully asynchronous might require a substantial API rewrite.
> 
> I see at least two reasons for a fully async API:
> 
> 1. The program wants to handle other events (e.g. a management REST API)
>    from the same event loop thread that invokes libvfio-user. If
>    libvfio-user blocks then the other events cannot be handled within a
>    reasonable time frame.
> 
>    The workaround for this is to use multi-threading and ignore the
>    event-driven architecture implied by vfu_get_poll_fd().
> 
> 2. The program handles multiple clients that do not trust each other.
>    This could be a software-defined network switch or storage appliance.
>    A malicious client can cause a denial-of-service by making a
>    libvfio-user call block.
> 
>    Again, the program needs separate threads instead of an event loop to
>    work around this.

Hi Stefan, we're certainly aware of the rationale. Ultimately we just haven't
yet found resources to fix this: in practice, we don't really hit problems from
the parts that are still synchronous. Of course, that's not a good argument when
it comes to your second issue, and indeed the library is not currently suitable
for multiple untrusted clients.

For Jag but: patches are welcome. But it's not just negotiate(): all sorts of
things are potentially synchronous - for example, it's expected that the region
read/write callbacks are done synchronously. Any other client reply, or
server->client message, is also synchronous.

It's partly why we haven't yet stabilised the API.

regards
john

Re: [PATCH v4 07/14] vfio-user: run vfio-user context

Posted by Stefan Hajnoczi 4 years ago

On Mon, Jan 10, 2022 at 05:56:25PM +0000, John Levon wrote:
> On Thu, Jan 06, 2022 at 01:35:32PM +0000, Stefan Hajnoczi wrote:
> 
> > > > >> +static void vfu_object_attach_ctx(void *opaque)
> > > > >> +{
> > > > >> +    VfuObject *o = opaque;
> > > > >> +    GPollFD pfds[1];
> > > > >> +    int ret;
> > > > >> +
> > > > >> +    qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
> > > > >> +
> > > > >> +    pfds[0].fd = o->vfu_poll_fd;
> > > > >> +    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> > > > >> +
> > > > >> +retry_attach:
> > > > >> +    ret = vfu_attach_ctx(o->vfu_ctx);
> > > > >> +    if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
> > > > >> +        qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
> > > > >> +        goto retry_attach;
> > > > >
> > > > > This can block the thread indefinitely. Other events like monitor
> > > > > commands are not handled in this loop. Please make this asynchronous
> > > > > (set an fd handler and return from this function so we can try again
> > > > > later).
> > > > >
> > > > > The vfu_attach_ctx() implementation synchronously negotiates the
> > > > > vfio-user connection :(. That's a shame because even if accept(2) is
> > > > > handled asynchronously, the negotiation can still block. It would be
> > > > > cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to
> > > > > avoid blocking. Is that possible?
> > > > 
> > > > Thanos / John,
> > > > 
> > > >     Any thoughts on this?
> > > 
> > > I'm discussing this with John and FYI there are other places where libvfio-user can block, e.g. sending a response or receiving a command. Is it just the negotiation you want it to be asynchronous or _all_ libvfio-user operations? Making libvfio-user fully asynchronous might require a substantial API rewrite.
> > 
> > I see at least two reasons for a fully async API:
> > 
> > 1. The program wants to handle other events (e.g. a management REST API)
> >    from the same event loop thread that invokes libvfio-user. If
> >    libvfio-user blocks then the other events cannot be handled within a
> >    reasonable time frame.
> > 
> >    The workaround for this is to use multi-threading and ignore the
> >    event-driven architecture implied by vfu_get_poll_fd().
> > 
> > 2. The program handles multiple clients that do not trust each other.
> >    This could be a software-defined network switch or storage appliance.
> >    A malicious client can cause a denial-of-service by making a
> >    libvfio-user call block.
> > 
> >    Again, the program needs separate threads instead of an event loop to
> >    work around this.
> 
> Hi Stefan, we're certainly aware of the rationale. Ultimately we just haven't
> yet found resources to fix this: in practice, we don't really hit problems from
> the parts that are still synchronous. Of course, that's not a good argument when
> it comes to your second issue, and indeed the library is not currently suitable
> for multiple untrusted clients.
> 
> For Jag but: patches are welcome. But it's not just negotiate(): all sorts of
> things are potentially synchronous - for example, it's expected that the region
> read/write callbacks are done synchronously. Any other client reply, or
> server->client message, is also synchronous.
> 
> It's partly why we haven't yet stabilised the API.

From the QEMU side it's okay to have blocking code in this experimental
feature if users are aware of the limitation (e.g. the monitor is
unresponsive if vfio-user blocks) and multiple untrusted clients are not
supported. The QEMU code should also make its limitations clear so
readers are aware of them and know the author chose this approach
intentionally rather than due to an oversight.

Jag: Please mention this in the documentation and add a comment to
vfu_object_attach_ctx() explaining that this code currently blocks.

I think in the long run it will be necessary to address it, e.g. in the
multi-client case. That can either be done with threads or by making
libvfio-user fully async.

Stefan

Re: [PATCH v4 07/14] vfio-user: run vfio-user context

Posted by Jag Raman 4 years ago


> On Jan 11, 2022, at 4:36 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> 
> On Mon, Jan 10, 2022 at 05:56:25PM +0000, John Levon wrote:
>> On Thu, Jan 06, 2022 at 01:35:32PM +0000, Stefan Hajnoczi wrote:
>> 
>>>>>>> +static void vfu_object_attach_ctx(void *opaque)
>>>>>>> +{
>>>>>>> +    VfuObject *o = opaque;
>>>>>>> +    GPollFD pfds[1];
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
>>>>>>> +
>>>>>>> +    pfds[0].fd = o->vfu_poll_fd;
>>>>>>> +    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
>>>>>>> +
>>>>>>> +retry_attach:
>>>>>>> +    ret = vfu_attach_ctx(o->vfu_ctx);
>>>>>>> +    if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
>>>>>>> +        qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
>>>>>>> +        goto retry_attach;
>>>>>> 
>>>>>> This can block the thread indefinitely. Other events like monitor
>>>>>> commands are not handled in this loop. Please make this asynchronous
>>>>>> (set an fd handler and return from this function so we can try again
>>>>>> later).
>>>>>> 
>>>>>> The vfu_attach_ctx() implementation synchronously negotiates the
>>>>>> vfio-user connection :(. That's a shame because even if accept(2) is
>>>>>> handled asynchronously, the negotiation can still block. It would be
>>>>>> cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to
>>>>>> avoid blocking. Is that possible?
>>>>> 
>>>>> Thanos / John,
>>>>> 
>>>>>    Any thoughts on this?
>>>> 
>>>> I'm discussing this with John and FYI there are other places where libvfio-user can block, e.g. sending a response or receiving a command. Is it just the negotiation you want it to be asynchronous or _all_ libvfio-user operations? Making libvfio-user fully asynchronous might require a substantial API rewrite.
>>> 
>>> I see at least two reasons for a fully async API:
>>> 
>>> 1. The program wants to handle other events (e.g. a management REST API)
>>>   from the same event loop thread that invokes libvfio-user. If
>>>   libvfio-user blocks then the other events cannot be handled within a
>>>   reasonable time frame.
>>> 
>>>   The workaround for this is to use multi-threading and ignore the
>>>   event-driven architecture implied by vfu_get_poll_fd().
>>> 
>>> 2. The program handles multiple clients that do not trust each other.
>>>   This could be a software-defined network switch or storage appliance.
>>>   A malicious client can cause a denial-of-service by making a
>>>   libvfio-user call block.
>>> 
>>>   Again, the program needs separate threads instead of an event loop to
>>>   work around this.
>> 
>> Hi Stefan, we're certainly aware of the rationale. Ultimately we just haven't
>> yet found resources to fix this: in practice, we don't really hit problems from
>> the parts that are still synchronous. Of course, that's not a good argument when
>> it comes to your second issue, and indeed the library is not currently suitable
>> for multiple untrusted clients.
>> 
>> For Jag but: patches are welcome. But it's not just negotiate(): all sorts of
>> things are potentially synchronous - for example, it's expected that the region
>> read/write callbacks are done synchronously. Any other client reply, or
>> server->client message, is also synchronous.
>> 
>> It's partly why we haven't yet stabilised the API.
> 
> From the QEMU side it's okay to have blocking code in this experimental
> feature if users are aware of the limitation (e.g. the monitor is
> unresponsive if vfio-user blocks) and multiple untrusted clients are not
> supported. The QEMU code should also make its limitations clear so
> readers are aware of them and know the author chose this approach
> intentionally rather than due to an oversight.
> 
> Jag: Please mention this in the documentation and add a comment to
> vfu_object_attach_ctx() explaining that this code currently blocks.

Sure, will do.

Thank you!
--
Jag

> 
> I think in the long run it will be necessary to address it, e.g. in the
> multi-client case. That can either be done with threads or by making
> libvfio-user fully async.
> 
> Stefan