[RFC 1/3] hmp: avoid the nested event loop in handle_hmp_command()

Stefan Hajnoczi posted 3 patches 1 year, 2 months ago
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, "Dr. David Alan Gilbert" <dave@treblig.org>, "Daniel P. Berrangé" <berrange@redhat.com>, Eduardo Habkost <eduardo@habkost.net>, Markus Armbruster <armbru@redhat.com>, Eric Blake <eblake@redhat.com>
[RFC 1/3] hmp: avoid the nested event loop in handle_hmp_command()
Posted by Stefan Hajnoczi 1 year, 2 months ago
Coroutine HMP commands currently run to completion in a nested event
loop with the Big QEMU Lock (BQL) held. The call_rcu thread also uses
the BQL and cannot process work while the coroutine monitor command is
running. A deadlock occurs when monitor commands attempt to wait for
call_rcu work to finish.

This patch refactors the HMP monitor to use the existing event loop
instead of creating a nested event loop. This will allow the next
patches to rely on draining call_rcu work.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 monitor/hmp.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/monitor/hmp.c b/monitor/hmp.c
index 69c1b7e98a..6cff2810aa 100644
--- a/monitor/hmp.c
+++ b/monitor/hmp.c
@@ -1111,15 +1111,17 @@ typedef struct HandleHmpCommandCo {
     Monitor *mon;
     const HMPCommand *cmd;
     QDict *qdict;
-    bool done;
 } HandleHmpCommandCo;
 
-static void handle_hmp_command_co(void *opaque)
+static void coroutine_fn handle_hmp_command_co(void *opaque)
 {
     HandleHmpCommandCo *data = opaque;
+
     handle_hmp_command_exec(data->mon, data->cmd, data->qdict);
     monitor_set_cur(qemu_coroutine_self(), NULL);
-    data->done = true;
+    qobject_unref(data->qdict);
+    monitor_resume(data->mon);
+    g_free(data);
 }
 
 void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
@@ -1157,20 +1159,20 @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
         Monitor *old_mon = monitor_set_cur(qemu_coroutine_self(), &mon->common);
         handle_hmp_command_exec(&mon->common, cmd, qdict);
         monitor_set_cur(qemu_coroutine_self(), old_mon);
+        qobject_unref(qdict);
     } else {
-        HandleHmpCommandCo data = {
-            .mon = &mon->common,
-            .cmd = cmd,
-            .qdict = qdict,
-            .done = false,
-        };
-        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, &data);
+        HandleHmpCommandCo *data; /* freed by handle_hmp_command_co() */
+
+        data = g_new(HandleHmpCommandCo, 1);
+        data->mon = &mon->common;
+        data->cmd = cmd;
+        data->qdict = qdict; /* freed by handle_hmp_command_co() */
+
+        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, data);
+        monitor_suspend(&mon->common); /* resumed by handle_hmp_command_co() */
         monitor_set_cur(co, &mon->common);
         aio_co_enter(qemu_get_aio_context(), co);
-        AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done);
     }
-
-    qobject_unref(qdict);
 }
 
 static void cmd_completion(MonitorHMP *mon, const char *name, const char *list)
-- 
2.41.0
Re: [RFC 1/3] hmp: avoid the nested event loop in handle_hmp_command()
Posted by Dr. David Alan Gilbert 1 year, 2 months ago
* Stefan Hajnoczi (stefanha@redhat.com) wrote:
> Coroutine HMP commands currently run to completion in a nested event
> loop with the Big QEMU Lock (BQL) held. The call_rcu thread also uses
> the BQL and cannot process work while the coroutine monitor command is
> running. A deadlock occurs when monitor commands attempt to wait for
> call_rcu work to finish.

I hate to think if there's anywhere else that ends up doing that
other than the monitors.

But, not knowing the semantics of the rcu code, it looks kind of OK to
me from the monitor.

(Do you ever get anything like qemu quitting from one of the other
monitors while this coroutine hasn't been run?)

Dave

> This patch refactors the HMP monitor to use the existing event loop
> instead of creating a nested event loop. This will allow the next
> patches to rely on draining call_rcu work.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  monitor/hmp.c | 28 +++++++++++++++-------------
>  1 file changed, 15 insertions(+), 13 deletions(-)
> 
> diff --git a/monitor/hmp.c b/monitor/hmp.c
> index 69c1b7e98a..6cff2810aa 100644
> --- a/monitor/hmp.c
> +++ b/monitor/hmp.c
> @@ -1111,15 +1111,17 @@ typedef struct HandleHmpCommandCo {
>      Monitor *mon;
>      const HMPCommand *cmd;
>      QDict *qdict;
> -    bool done;
>  } HandleHmpCommandCo;
>  
> -static void handle_hmp_command_co(void *opaque)
> +static void coroutine_fn handle_hmp_command_co(void *opaque)
>  {
>      HandleHmpCommandCo *data = opaque;
> +
>      handle_hmp_command_exec(data->mon, data->cmd, data->qdict);
>      monitor_set_cur(qemu_coroutine_self(), NULL);
> -    data->done = true;
> +    qobject_unref(data->qdict);
> +    monitor_resume(data->mon);
> +    g_free(data);
>  }
>  
>  void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
> @@ -1157,20 +1159,20 @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
>          Monitor *old_mon = monitor_set_cur(qemu_coroutine_self(), &mon->common);
>          handle_hmp_command_exec(&mon->common, cmd, qdict);
>          monitor_set_cur(qemu_coroutine_self(), old_mon);
> +        qobject_unref(qdict);
>      } else {
> -        HandleHmpCommandCo data = {
> -            .mon = &mon->common,
> -            .cmd = cmd,
> -            .qdict = qdict,
> -            .done = false,
> -        };
> -        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, &data);
> +        HandleHmpCommandCo *data; /* freed by handle_hmp_command_co() */
> +
> +        data = g_new(HandleHmpCommandCo, 1);
> +        data->mon = &mon->common;
> +        data->cmd = cmd;
> +        data->qdict = qdict; /* freed by handle_hmp_command_co() */
> +
> +        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, data);
> +        monitor_suspend(&mon->common); /* resumed by handle_hmp_command_co() */
>          monitor_set_cur(co, &mon->common);
>          aio_co_enter(qemu_get_aio_context(), co);
> -        AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done);
>      }
> -
> -    qobject_unref(qdict);
>  }
>  
>  static void cmd_completion(MonitorHMP *mon, const char *name, const char *list)
> -- 
> 2.41.0
> 
> 
-- 
 -----Open up your eyes, open up your mind, open up your code -------   
/ Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \ 
\        dave @ treblig.org |                               | In Hex /
 \ _________________________|_____ http://www.treblig.org   |_______/
Re: [RFC 1/3] hmp: avoid the nested event loop in handle_hmp_command()
Posted by Stefan Hajnoczi 1 year, 2 months ago
On Thu, Sep 07, 2023 at 01:06:39AM +0000, Dr. David Alan Gilbert wrote:
> * Stefan Hajnoczi (stefanha@redhat.com) wrote:
> > Coroutine HMP commands currently run to completion in a nested event
> > loop with the Big QEMU Lock (BQL) held. The call_rcu thread also uses
> > the BQL and cannot process work while the coroutine monitor command is
> > running. A deadlock occurs when monitor commands attempt to wait for
> > call_rcu work to finish.
> 
> I hate to think if there's anywhere else that ends up doing that
> other than the monitors.

Luckily drain_call_rcu() has few callers: just
xen_block_device_destroy() and qmp_device_add(). We only need to worry
about their call stacks.

I haven't looked at the Xen code.

> 
> But, not knowing the semantics of the rcu code, it looks kind of OK to
> me from the monitor.
> 
> (Do you ever get anything like qemu quitting from one of the other
> monitors while this coroutine hasn't been run?)

Not sure what you mean?

Stefan

> 
> Dave
> 
> > This patch refactors the HMP monitor to use the existing event loop
> > instead of creating a nested event loop. This will allow the next
> > patches to rely on draining call_rcu work.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  monitor/hmp.c | 28 +++++++++++++++-------------
> >  1 file changed, 15 insertions(+), 13 deletions(-)
> > 
> > diff --git a/monitor/hmp.c b/monitor/hmp.c
> > index 69c1b7e98a..6cff2810aa 100644
> > --- a/monitor/hmp.c
> > +++ b/monitor/hmp.c
> > @@ -1111,15 +1111,17 @@ typedef struct HandleHmpCommandCo {
> >      Monitor *mon;
> >      const HMPCommand *cmd;
> >      QDict *qdict;
> > -    bool done;
> >  } HandleHmpCommandCo;
> >  
> > -static void handle_hmp_command_co(void *opaque)
> > +static void coroutine_fn handle_hmp_command_co(void *opaque)
> >  {
> >      HandleHmpCommandCo *data = opaque;
> > +
> >      handle_hmp_command_exec(data->mon, data->cmd, data->qdict);
> >      monitor_set_cur(qemu_coroutine_self(), NULL);
> > -    data->done = true;
> > +    qobject_unref(data->qdict);
> > +    monitor_resume(data->mon);
> > +    g_free(data);
> >  }
> >  
> >  void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
> > @@ -1157,20 +1159,20 @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
> >          Monitor *old_mon = monitor_set_cur(qemu_coroutine_self(), &mon->common);
> >          handle_hmp_command_exec(&mon->common, cmd, qdict);
> >          monitor_set_cur(qemu_coroutine_self(), old_mon);
> > +        qobject_unref(qdict);
> >      } else {
> > -        HandleHmpCommandCo data = {
> > -            .mon = &mon->common,
> > -            .cmd = cmd,
> > -            .qdict = qdict,
> > -            .done = false,
> > -        };
> > -        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, &data);
> > +        HandleHmpCommandCo *data; /* freed by handle_hmp_command_co() */
> > +
> > +        data = g_new(HandleHmpCommandCo, 1);
> > +        data->mon = &mon->common;
> > +        data->cmd = cmd;
> > +        data->qdict = qdict; /* freed by handle_hmp_command_co() */
> > +
> > +        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, data);
> > +        monitor_suspend(&mon->common); /* resumed by handle_hmp_command_co() */
> >          monitor_set_cur(co, &mon->common);
> >          aio_co_enter(qemu_get_aio_context(), co);
> > -        AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done);
> >      }
> > -
> > -    qobject_unref(qdict);
> >  }
> >  
> >  static void cmd_completion(MonitorHMP *mon, const char *name, const char *list)
> > -- 
> > 2.41.0
> > 
> > 
> -- 
>  -----Open up your eyes, open up your mind, open up your code -------   
> / Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \ 
> \        dave @ treblig.org |                               | In Hex /
>  \ _________________________|_____ http://www.treblig.org   |_______/
> 
Re: [RFC 1/3] hmp: avoid the nested event loop in handle_hmp_command()
Posted by Dr. David Alan Gilbert 1 year, 2 months ago
* Stefan Hajnoczi (stefanha@redhat.com) wrote:
> On Thu, Sep 07, 2023 at 01:06:39AM +0000, Dr. David Alan Gilbert wrote:
> > * Stefan Hajnoczi (stefanha@redhat.com) wrote:
> > > Coroutine HMP commands currently run to completion in a nested event
> > > loop with the Big QEMU Lock (BQL) held. The call_rcu thread also uses
> > > the BQL and cannot process work while the coroutine monitor command is
> > > running. A deadlock occurs when monitor commands attempt to wait for
> > > call_rcu work to finish.
> > 
> > I hate to think if there's anywhere else that ends up doing that
> > other than the monitors.
> 
> Luckily drain_call_rcu() has few callers: just
> xen_block_device_destroy() and qmp_device_add(). We only need to worry
> about their call stacks.
> 
> I haven't looked at the Xen code.
> 
> > 
> > But, not knowing the semantics of the rcu code, it looks kind of OK to
> > me from the monitor.
> > 
> > (Do you ever get anything like qemu quitting from one of the other
> > monitors while this coroutine hasn't been run?)
> 
> Not sure what you mean?

Imagine that just after you create your coroutine, a vCPU does a
shutdown and qemu is configured to quit, or on another monitor someone
does a quit;  does your coroutine get executed or not?

Dave

> Stefan
> 
> > 
> > Dave
> > 
> > > This patch refactors the HMP monitor to use the existing event loop
> > > instead of creating a nested event loop. This will allow the next
> > > patches to rely on draining call_rcu work.
> > > 
> > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > ---
> > >  monitor/hmp.c | 28 +++++++++++++++-------------
> > >  1 file changed, 15 insertions(+), 13 deletions(-)
> > > 
> > > diff --git a/monitor/hmp.c b/monitor/hmp.c
> > > index 69c1b7e98a..6cff2810aa 100644
> > > --- a/monitor/hmp.c
> > > +++ b/monitor/hmp.c
> > > @@ -1111,15 +1111,17 @@ typedef struct HandleHmpCommandCo {
> > >      Monitor *mon;
> > >      const HMPCommand *cmd;
> > >      QDict *qdict;
> > > -    bool done;
> > >  } HandleHmpCommandCo;
> > >  
> > > -static void handle_hmp_command_co(void *opaque)
> > > +static void coroutine_fn handle_hmp_command_co(void *opaque)
> > >  {
> > >      HandleHmpCommandCo *data = opaque;
> > > +
> > >      handle_hmp_command_exec(data->mon, data->cmd, data->qdict);
> > >      monitor_set_cur(qemu_coroutine_self(), NULL);
> > > -    data->done = true;
> > > +    qobject_unref(data->qdict);
> > > +    monitor_resume(data->mon);
> > > +    g_free(data);
> > >  }
> > >  
> > >  void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
> > > @@ -1157,20 +1159,20 @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
> > >          Monitor *old_mon = monitor_set_cur(qemu_coroutine_self(), &mon->common);
> > >          handle_hmp_command_exec(&mon->common, cmd, qdict);
> > >          monitor_set_cur(qemu_coroutine_self(), old_mon);
> > > +        qobject_unref(qdict);
> > >      } else {
> > > -        HandleHmpCommandCo data = {
> > > -            .mon = &mon->common,
> > > -            .cmd = cmd,
> > > -            .qdict = qdict,
> > > -            .done = false,
> > > -        };
> > > -        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, &data);
> > > +        HandleHmpCommandCo *data; /* freed by handle_hmp_command_co() */
> > > +
> > > +        data = g_new(HandleHmpCommandCo, 1);
> > > +        data->mon = &mon->common;
> > > +        data->cmd = cmd;
> > > +        data->qdict = qdict; /* freed by handle_hmp_command_co() */
> > > +
> > > +        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, data);
> > > +        monitor_suspend(&mon->common); /* resumed by handle_hmp_command_co() */
> > >          monitor_set_cur(co, &mon->common);
> > >          aio_co_enter(qemu_get_aio_context(), co);
> > > -        AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done);
> > >      }
> > > -
> > > -    qobject_unref(qdict);
> > >  }
> > >  
> > >  static void cmd_completion(MonitorHMP *mon, const char *name, const char *list)
> > > -- 
> > > 2.41.0
> > > 
> > > 
> > -- 
> >  -----Open up your eyes, open up your mind, open up your code -------   
> > / Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \ 
> > \        dave @ treblig.org |                               | In Hex /
> >  \ _________________________|_____ http://www.treblig.org   |_______/
> > 


-- 
 -----Open up your eyes, open up your mind, open up your code -------   
/ Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \ 
\        dave @ treblig.org |                               | In Hex /
 \ _________________________|_____ http://www.treblig.org   |_______/
Re: [RFC 1/3] hmp: avoid the nested event loop in handle_hmp_command()
Posted by Stefan Hajnoczi 1 year, 2 months ago
On Thu, 7 Sept 2023 at 10:07, Dr. David Alan Gilbert <dave@treblig.org> wrote:
>
> * Stefan Hajnoczi (stefanha@redhat.com) wrote:
> > On Thu, Sep 07, 2023 at 01:06:39AM +0000, Dr. David Alan Gilbert wrote:
> > > * Stefan Hajnoczi (stefanha@redhat.com) wrote:
> > > > Coroutine HMP commands currently run to completion in a nested event
> > > > loop with the Big QEMU Lock (BQL) held. The call_rcu thread also uses
> > > > the BQL and cannot process work while the coroutine monitor command is
> > > > running. A deadlock occurs when monitor commands attempt to wait for
> > > > call_rcu work to finish.
> > >
> > > I hate to think if there's anywhere else that ends up doing that
> > > other than the monitors.
> >
> > Luckily drain_call_rcu() has few callers: just
> > xen_block_device_destroy() and qmp_device_add(). We only need to worry
> > about their call stacks.
> >
> > I haven't looked at the Xen code.
> >
> > >
> > > But, not knowing the semantics of the rcu code, it looks kind of OK to
> > > me from the monitor.
> > >
> > > (Do you ever get anything like qemu quitting from one of the other
> > > monitors while this coroutine hasn't been run?)
> >
> > Not sure what you mean?
>
> Imagine that just after you create your coroutine, a vCPU does a
> shutdown and qemu is configured to quit, or on another monitor someone
> does a quit;  does your coroutine get executed or not?

I think the answer is that it depends.

A coroutine can run for a while and then yield while waiting for a
timer, BH, fd handler, etc. If the coroutine has yielded then I think
QEMU could terminate.

The behavior of entering a coroutine for the first time depends on the
API that is used (e.g. qemu_coroutine_enter()/aio_co_enter()/etc).
qemu_coroutine_enter() is immediate but aio_co_enter() contains
indirect code paths like scheduling a BH.

To summarize: ¯\_(ツ)_/¯

Stefan

>
> Dave
>
> > Stefan
> >
> > >
> > > Dave
> > >
> > > > This patch refactors the HMP monitor to use the existing event loop
> > > > instead of creating a nested event loop. This will allow the next
> > > > patches to rely on draining call_rcu work.
> > > >
> > > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > > ---
> > > >  monitor/hmp.c | 28 +++++++++++++++-------------
> > > >  1 file changed, 15 insertions(+), 13 deletions(-)
> > > >
> > > > diff --git a/monitor/hmp.c b/monitor/hmp.c
> > > > index 69c1b7e98a..6cff2810aa 100644
> > > > --- a/monitor/hmp.c
> > > > +++ b/monitor/hmp.c
> > > > @@ -1111,15 +1111,17 @@ typedef struct HandleHmpCommandCo {
> > > >      Monitor *mon;
> > > >      const HMPCommand *cmd;
> > > >      QDict *qdict;
> > > > -    bool done;
> > > >  } HandleHmpCommandCo;
> > > >
> > > > -static void handle_hmp_command_co(void *opaque)
> > > > +static void coroutine_fn handle_hmp_command_co(void *opaque)
> > > >  {
> > > >      HandleHmpCommandCo *data = opaque;
> > > > +
> > > >      handle_hmp_command_exec(data->mon, data->cmd, data->qdict);
> > > >      monitor_set_cur(qemu_coroutine_self(), NULL);
> > > > -    data->done = true;
> > > > +    qobject_unref(data->qdict);
> > > > +    monitor_resume(data->mon);
> > > > +    g_free(data);
> > > >  }
> > > >
> > > >  void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
> > > > @@ -1157,20 +1159,20 @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
> > > >          Monitor *old_mon = monitor_set_cur(qemu_coroutine_self(), &mon->common);
> > > >          handle_hmp_command_exec(&mon->common, cmd, qdict);
> > > >          monitor_set_cur(qemu_coroutine_self(), old_mon);
> > > > +        qobject_unref(qdict);
> > > >      } else {
> > > > -        HandleHmpCommandCo data = {
> > > > -            .mon = &mon->common,
> > > > -            .cmd = cmd,
> > > > -            .qdict = qdict,
> > > > -            .done = false,
> > > > -        };
> > > > -        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, &data);
> > > > +        HandleHmpCommandCo *data; /* freed by handle_hmp_command_co() */
> > > > +
> > > > +        data = g_new(HandleHmpCommandCo, 1);
> > > > +        data->mon = &mon->common;
> > > > +        data->cmd = cmd;
> > > > +        data->qdict = qdict; /* freed by handle_hmp_command_co() */
> > > > +
> > > > +        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, data);
> > > > +        monitor_suspend(&mon->common); /* resumed by handle_hmp_command_co() */
> > > >          monitor_set_cur(co, &mon->common);
> > > >          aio_co_enter(qemu_get_aio_context(), co);
> > > > -        AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done);
> > > >      }
> > > > -
> > > > -    qobject_unref(qdict);
> > > >  }
> > > >
> > > >  static void cmd_completion(MonitorHMP *mon, const char *name, const char *list)
> > > > --
> > > > 2.41.0
> > > >
> > > >
> > > --
> > >  -----Open up your eyes, open up your mind, open up your code -------
> > > / Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \
> > > \        dave @ treblig.org |                               | In Hex /
> > >  \ _________________________|_____ http://www.treblig.org   |_______/
> > >
>
>
> --
>  -----Open up your eyes, open up your mind, open up your code -------
> / Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \
> \        dave @ treblig.org |                               | In Hex /
>  \ _________________________|_____ http://www.treblig.org   |_______/
>
Re: [RFC 1/3] hmp: avoid the nested event loop in handle_hmp_command()
Posted by Dr. David Alan Gilbert 1 year, 2 months ago
* Stefan Hajnoczi (stefanha@gmail.com) wrote:
> On Thu, 7 Sept 2023 at 10:07, Dr. David Alan Gilbert <dave@treblig.org> wrote:
> >
> > * Stefan Hajnoczi (stefanha@redhat.com) wrote:
> > > On Thu, Sep 07, 2023 at 01:06:39AM +0000, Dr. David Alan Gilbert wrote:
> > > > * Stefan Hajnoczi (stefanha@redhat.com) wrote:
> > > > > Coroutine HMP commands currently run to completion in a nested event
> > > > > loop with the Big QEMU Lock (BQL) held. The call_rcu thread also uses
> > > > > the BQL and cannot process work while the coroutine monitor command is
> > > > > running. A deadlock occurs when monitor commands attempt to wait for
> > > > > call_rcu work to finish.
> > > >
> > > > I hate to think if there's anywhere else that ends up doing that
> > > > other than the monitors.
> > >
> > > Luckily drain_call_rcu() has few callers: just
> > > xen_block_device_destroy() and qmp_device_add(). We only need to worry
> > > about their call stacks.
> > >
> > > I haven't looked at the Xen code.
> > >
> > > >
> > > > But, not knowing the semantics of the rcu code, it looks kind of OK to
> > > > me from the monitor.
> > > >
> > > > (Do you ever get anything like qemu quitting from one of the other
> > > > monitors while this coroutine hasn't been run?)
> > >
> > > Not sure what you mean?
> >
> > Imagine that just after you create your coroutine, a vCPU does a
> > shutdown and qemu is configured to quit, or on another monitor someone
> > does a quit;  does your coroutine get executed or not?
> 
> I think the answer is that it depends.
> 
> A coroutine can run for a while and then yield while waiting for a
> timer, BH, fd handler, etc. If the coroutine has yielded then I think
> QEMU could terminate.
> 
> The behavior of entering a coroutine for the first time depends on the
> API that is used (e.g. qemu_coroutine_enter()/aio_co_enter()/etc).
> qemu_coroutine_enter() is immediate but aio_co_enter() contains
> indirect code paths like scheduling a BH.
> 
> To summarize: ¯\_(ツ)_/¯

That does mean you leave your g_new'd data and qdict allocated at
exit - meh

I'm not sure if it means you're making any other assumptions about what
happens if the coroutine gets run during the exit path; although I guess
there are plenty of other cases like that.

Dave

> Stefan
> 
> >
> > Dave
> >
> > > Stefan
> > >
> > > >
> > > > Dave
> > > >
> > > > > This patch refactors the HMP monitor to use the existing event loop
> > > > > instead of creating a nested event loop. This will allow the next
> > > > > patches to rely on draining call_rcu work.
> > > > >
> > > > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > > > ---
> > > > >  monitor/hmp.c | 28 +++++++++++++++-------------
> > > > >  1 file changed, 15 insertions(+), 13 deletions(-)
> > > > >
> > > > > diff --git a/monitor/hmp.c b/monitor/hmp.c
> > > > > index 69c1b7e98a..6cff2810aa 100644
> > > > > --- a/monitor/hmp.c
> > > > > +++ b/monitor/hmp.c
> > > > > @@ -1111,15 +1111,17 @@ typedef struct HandleHmpCommandCo {
> > > > >      Monitor *mon;
> > > > >      const HMPCommand *cmd;
> > > > >      QDict *qdict;
> > > > > -    bool done;
> > > > >  } HandleHmpCommandCo;
> > > > >
> > > > > -static void handle_hmp_command_co(void *opaque)
> > > > > +static void coroutine_fn handle_hmp_command_co(void *opaque)
> > > > >  {
> > > > >      HandleHmpCommandCo *data = opaque;
> > > > > +
> > > > >      handle_hmp_command_exec(data->mon, data->cmd, data->qdict);
> > > > >      monitor_set_cur(qemu_coroutine_self(), NULL);
> > > > > -    data->done = true;
> > > > > +    qobject_unref(data->qdict);
> > > > > +    monitor_resume(data->mon);
> > > > > +    g_free(data);
> > > > >  }
> > > > >
> > > > >  void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
> > > > > @@ -1157,20 +1159,20 @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
> > > > >          Monitor *old_mon = monitor_set_cur(qemu_coroutine_self(), &mon->common);
> > > > >          handle_hmp_command_exec(&mon->common, cmd, qdict);
> > > > >          monitor_set_cur(qemu_coroutine_self(), old_mon);
> > > > > +        qobject_unref(qdict);
> > > > >      } else {
> > > > > -        HandleHmpCommandCo data = {
> > > > > -            .mon = &mon->common,
> > > > > -            .cmd = cmd,
> > > > > -            .qdict = qdict,
> > > > > -            .done = false,
> > > > > -        };
> > > > > -        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, &data);
> > > > > +        HandleHmpCommandCo *data; /* freed by handle_hmp_command_co() */
> > > > > +
> > > > > +        data = g_new(HandleHmpCommandCo, 1);
> > > > > +        data->mon = &mon->common;
> > > > > +        data->cmd = cmd;
> > > > > +        data->qdict = qdict; /* freed by handle_hmp_command_co() */
> > > > > +
> > > > > +        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, data);
> > > > > +        monitor_suspend(&mon->common); /* resumed by handle_hmp_command_co() */
> > > > >          monitor_set_cur(co, &mon->common);
> > > > >          aio_co_enter(qemu_get_aio_context(), co);
> > > > > -        AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done);
> > > > >      }
> > > > > -
> > > > > -    qobject_unref(qdict);
> > > > >  }
> > > > >
> > > > >  static void cmd_completion(MonitorHMP *mon, const char *name, const char *list)
> > > > > --
> > > > > 2.41.0
> > > > >
> > > > >
> > > > --
> > > >  -----Open up your eyes, open up your mind, open up your code -------
> > > > / Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \
> > > > \        dave @ treblig.org |                               | In Hex /
> > > >  \ _________________________|_____ http://www.treblig.org   |_______/
> > > >
> >
> >
> > --
> >  -----Open up your eyes, open up your mind, open up your code -------
> > / Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \
> > \        dave @ treblig.org |                               | In Hex /
> >  \ _________________________|_____ http://www.treblig.org   |_______/
> >
-- 
 -----Open up your eyes, open up your mind, open up your code -------   
/ Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \ 
\        dave @ treblig.org |                               | In Hex /
 \ _________________________|_____ http://www.treblig.org   |_______/

Re: [RFC 1/3] hmp: avoid the nested event loop in handle_hmp_command()
Posted by Stefan Hajnoczi 1 year, 2 months ago
On Thu, 7 Sept 2023 at 16:53, Dr. David Alan Gilbert <dave@treblig.org> wrote:
>
> * Stefan Hajnoczi (stefanha@gmail.com) wrote:
> > On Thu, 7 Sept 2023 at 10:07, Dr. David Alan Gilbert <dave@treblig.org> wrote:
> > >
> > > * Stefan Hajnoczi (stefanha@redhat.com) wrote:
> > > > On Thu, Sep 07, 2023 at 01:06:39AM +0000, Dr. David Alan Gilbert wrote:
> > > > > * Stefan Hajnoczi (stefanha@redhat.com) wrote:
> > > > > > Coroutine HMP commands currently run to completion in a nested event
> > > > > > loop with the Big QEMU Lock (BQL) held. The call_rcu thread also uses
> > > > > > the BQL and cannot process work while the coroutine monitor command is
> > > > > > running. A deadlock occurs when monitor commands attempt to wait for
> > > > > > call_rcu work to finish.
> > > > >
> > > > > I hate to think if there's anywhere else that ends up doing that
> > > > > other than the monitors.
> > > >
> > > > Luckily drain_call_rcu() has few callers: just
> > > > xen_block_device_destroy() and qmp_device_add(). We only need to worry
> > > > about their call stacks.
> > > >
> > > > I haven't looked at the Xen code.
> > > >
> > > > >
> > > > > But, not knowing the semantics of the rcu code, it looks kind of OK to
> > > > > me from the monitor.
> > > > >
> > > > > (Do you ever get anything like qemu quitting from one of the other
> > > > > monitors while this coroutine hasn't been run?)
> > > >
> > > > Not sure what you mean?
> > >
> > > Imagine that just after you create your coroutine, a vCPU does a
> > > shutdown and qemu is configured to quit, or on another monitor someone
> > > does a quit;  does your coroutine get executed or not?
> >
> > I think the answer is that it depends.
> >
> > A coroutine can run for a while and then yield while waiting for a
> > timer, BH, fd handler, etc. If the coroutine has yielded then I think
> > QEMU could terminate.
> >
> > The behavior of entering a coroutine for the first time depends on the
> > API that is used (e.g. qemu_coroutine_enter()/aio_co_enter()/etc).
> > qemu_coroutine_enter() is immediate but aio_co_enter() contains
> > indirect code paths like scheduling a BH.
> >
> > To summarize: ¯\_(ツ)_/¯
>
> That does mean you leave your g_new'd data and qdict allocated at
> exit - meh
>
> I'm not sure if it means you're making any other assumptions about what
> happens if the coroutine gets run during the exit path; although I guess
> there are plenty of other cases like that.

Yes, I think QEMU has some resources (memory, file descriptors, etc)
that are not freed on exit.

Stefan

>
> Dave
>
> > Stefan
> >
> > >
> > > Dave
> > >
> > > > Stefan
> > > >
> > > > >
> > > > > Dave
> > > > >
> > > > > > This patch refactors the HMP monitor to use the existing event loop
> > > > > > instead of creating a nested event loop. This will allow the next
> > > > > > patches to rely on draining call_rcu work.
> > > > > >
> > > > > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > > > > ---
> > > > > >  monitor/hmp.c | 28 +++++++++++++++-------------
> > > > > >  1 file changed, 15 insertions(+), 13 deletions(-)
> > > > > >
> > > > > > diff --git a/monitor/hmp.c b/monitor/hmp.c
> > > > > > index 69c1b7e98a..6cff2810aa 100644
> > > > > > --- a/monitor/hmp.c
> > > > > > +++ b/monitor/hmp.c
> > > > > > @@ -1111,15 +1111,17 @@ typedef struct HandleHmpCommandCo {
> > > > > >      Monitor *mon;
> > > > > >      const HMPCommand *cmd;
> > > > > >      QDict *qdict;
> > > > > > -    bool done;
> > > > > >  } HandleHmpCommandCo;
> > > > > >
> > > > > > -static void handle_hmp_command_co(void *opaque)
> > > > > > +static void coroutine_fn handle_hmp_command_co(void *opaque)
> > > > > >  {
> > > > > >      HandleHmpCommandCo *data = opaque;
> > > > > > +
> > > > > >      handle_hmp_command_exec(data->mon, data->cmd, data->qdict);
> > > > > >      monitor_set_cur(qemu_coroutine_self(), NULL);
> > > > > > -    data->done = true;
> > > > > > +    qobject_unref(data->qdict);
> > > > > > +    monitor_resume(data->mon);
> > > > > > +    g_free(data);
> > > > > >  }
> > > > > >
> > > > > >  void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
> > > > > > @@ -1157,20 +1159,20 @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
> > > > > >          Monitor *old_mon = monitor_set_cur(qemu_coroutine_self(), &mon->common);
> > > > > >          handle_hmp_command_exec(&mon->common, cmd, qdict);
> > > > > >          monitor_set_cur(qemu_coroutine_self(), old_mon);
> > > > > > +        qobject_unref(qdict);
> > > > > >      } else {
> > > > > > -        HandleHmpCommandCo data = {
> > > > > > -            .mon = &mon->common,
> > > > > > -            .cmd = cmd,
> > > > > > -            .qdict = qdict,
> > > > > > -            .done = false,
> > > > > > -        };
> > > > > > -        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, &data);
> > > > > > +        HandleHmpCommandCo *data; /* freed by handle_hmp_command_co() */
> > > > > > +
> > > > > > +        data = g_new(HandleHmpCommandCo, 1);
> > > > > > +        data->mon = &mon->common;
> > > > > > +        data->cmd = cmd;
> > > > > > +        data->qdict = qdict; /* freed by handle_hmp_command_co() */
> > > > > > +
> > > > > > +        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, data);
> > > > > > +        monitor_suspend(&mon->common); /* resumed by handle_hmp_command_co() */
> > > > > >          monitor_set_cur(co, &mon->common);
> > > > > >          aio_co_enter(qemu_get_aio_context(), co);
> > > > > > -        AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done);
> > > > > >      }
> > > > > > -
> > > > > > -    qobject_unref(qdict);
> > > > > >  }
> > > > > >
> > > > > >  static void cmd_completion(MonitorHMP *mon, const char *name, const char *list)
> > > > > > --
> > > > > > 2.41.0
> > > > > >
> > > > > >
> > > > > --
> > > > >  -----Open up your eyes, open up your mind, open up your code -------
> > > > > / Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \
> > > > > \        dave @ treblig.org |                               | In Hex /
> > > > >  \ _________________________|_____ http://www.treblig.org   |_______/
> > > > >
> > >
> > >
> > > --
> > >  -----Open up your eyes, open up your mind, open up your code -------
> > > / Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \
> > > \        dave @ treblig.org |                               | In Hex /
> > >  \ _________________________|_____ http://www.treblig.org   |_______/
> > >
> --
>  -----Open up your eyes, open up your mind, open up your code -------
> / Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \
> \        dave @ treblig.org |                               | In Hex /
>  \ _________________________|_____ http://www.treblig.org   |_______/