[PATCH] console: make QMP screendump use coroutine

Marc-André Lureau posted 1 patch 4 years, 3 months ago
Test docker-mingw@fedora failed
Test checkpatch passed
Test docker-quick@centos7 failed
Test FreeBSD passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20200113144848.2168018-1-marcandre.lureau@redhat.com
Maintainers: Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>, Gerd Hoffmann <kraxel@redhat.com>
There is a newer version of this series
qapi/ui.json    |  3 ++-
ui/console.c    | 35 +++++++++++++++++++++++++++--------
ui/trace-events |  2 +-
3 files changed, 30 insertions(+), 10 deletions(-)
[PATCH] console: make QMP screendump use coroutine
Posted by Marc-André Lureau 4 years, 3 months ago
Thanks to the QMP coroutine support, the screendump handler can
trigger a graphic_hw_update(), yield and let the main loop run until
update is done. Then the handler is resumed, and the ppm_save() will
write the screen image to disk in the coroutine context (thus
non-blocking).

For now, HMP doesn't have coroutine support, so it remains potentially
outdated or glitched.

Fixes:
https://bugzilla.redhat.com/show_bug.cgi?id=1230527

Based-on: <20200109183545.27452-2-kwolf@redhat.com>

Cc: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 qapi/ui.json    |  3 ++-
 ui/console.c    | 35 +++++++++++++++++++++++++++--------
 ui/trace-events |  2 +-
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/qapi/ui.json b/qapi/ui.json
index e04525d8b4..d941202f34 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -96,7 +96,8 @@
 #
 ##
 { 'command': 'screendump',
-  'data': {'filename': 'str', '*device': 'str', '*head': 'int'} }
+  'data': {'filename': 'str', '*device': 'str', '*head': 'int'},
+  'coroutine': true }
 
 ##
 # == Spice
diff --git a/ui/console.c b/ui/console.c
index ac79d679f5..db184b473f 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -167,6 +167,7 @@ struct QemuConsole {
     QEMUFIFO out_fifo;
     uint8_t out_fifo_buf[16];
     QEMUTimer *kbd_timer;
+    Coroutine *screendump_co;
 
     QTAILQ_ENTRY(QemuConsole) next;
 };
@@ -194,7 +195,6 @@ static void dpy_refresh(DisplayState *s);
 static DisplayState *get_alloc_displaystate(void);
 static void text_console_update_cursor_timer(void);
 static void text_console_update_cursor(void *opaque);
-static bool ppm_save(int fd, DisplaySurface *ds, Error **errp);
 
 static void gui_update(void *opaque)
 {
@@ -263,6 +263,9 @@ static void gui_setup_refresh(DisplayState *ds)
 
 void graphic_hw_update_done(QemuConsole *con)
 {
+    if (con && con->screendump_co) {
+        aio_co_wake(con->screendump_co);
+    }
 }
 
 void graphic_hw_update(QemuConsole *con)
@@ -310,16 +313,16 @@ void graphic_hw_invalidate(QemuConsole *con)
     }
 }
 
-static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
+static bool ppm_save(int fd, pixman_image_t *image, Error **errp)
 {
-    int width = pixman_image_get_width(ds->image);
-    int height = pixman_image_get_height(ds->image);
+    int width = pixman_image_get_width(image);
+    int height = pixman_image_get_height(image);
     g_autoptr(Object) ioc = OBJECT(qio_channel_file_new_fd(fd));
     g_autofree char *header = NULL;
     g_autoptr(pixman_image_t) linebuf = NULL;
     int y;
 
-    trace_ppm_save(fd, ds);
+    trace_ppm_save(fd, image);
 
     header = g_strdup_printf("P6\n%d %d\n%d\n", width, height, 255);
     if (qio_channel_write_all(QIO_CHANNEL(ioc),
@@ -329,7 +332,7 @@ static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
 
     linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
     for (y = 0; y < height; y++) {
-        qemu_pixman_linebuf_fill(linebuf, ds->image, width, 0, y);
+        qemu_pixman_linebuf_fill(linebuf, image, width, 0, y);
         if (qio_channel_write_all(QIO_CHANNEL(ioc),
                                   (char *)pixman_image_get_data(linebuf),
                                   pixman_image_get_stride(linebuf), errp) < 0) {
@@ -340,11 +343,18 @@ static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
     return true;
 }
 
+static void graphic_hw_update_bh(void *con)
+{
+    graphic_hw_update(con);
+}
+
+/* may be called in coroutine context or not */
 void qmp_screendump(const char *filename, bool has_device, const char *device,
                     bool has_head, int64_t head, Error **errp)
 {
     QemuConsole *con;
     DisplaySurface *surface;
+    g_autoptr(pixman_image_t) image = NULL;
     int fd;
 
     if (has_device) {
@@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
         }
     }
 
-    graphic_hw_update(con);
+    if (qemu_in_coroutine()) {
+        assert(!con->screendump_co);
+        con->screendump_co = qemu_coroutine_self();
+        aio_bh_schedule_oneshot(qemu_get_aio_context(),
+                                graphic_hw_update_bh, con);
+        qemu_coroutine_yield();
+        con->screendump_co = NULL;
+    }
+
     surface = qemu_console_surface(con);
     if (!surface) {
         error_setg(errp, "no surface");
@@ -379,7 +397,8 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
         return;
     }
 
-    if (!ppm_save(fd, surface, errp)) {
+    image = pixman_image_ref(surface->image);
+    if (!ppm_save(fd, image, errp)) {
         qemu_unlink(filename);
     }
 }
diff --git a/ui/trace-events b/ui/trace-events
index 0dcda393c1..e8726fc969 100644
--- a/ui/trace-events
+++ b/ui/trace-events
@@ -15,7 +15,7 @@ displaysurface_create_pixman(void *display_surface) "surface=%p"
 displaysurface_free(void *display_surface) "surface=%p"
 displaychangelistener_register(void *dcl, const char *name) "%p [ %s ]"
 displaychangelistener_unregister(void *dcl, const char *name) "%p [ %s ]"
-ppm_save(int fd, void *display_surface) "fd=%d surface=%p"
+ppm_save(int fd, void *image) "fd=%d image=%p"
 
 # gtk.c
 # gtk-gl-area.c
-- 
2.25.0.rc2.1.g09a9a1a997


Re: [PATCH] console: make QMP screendump use coroutine
Posted by no-reply@patchew.org 4 years, 3 months ago
Patchew URL: https://patchew.org/QEMU/20200113144848.2168018-1-marcandre.lureau@redhat.com/



Hi,

This series failed the docker-quick@centos7 build test. Please find the testing commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
make docker-image-centos7 V=1 NETWORK=1
time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
=== TEST SCRIPT END ===

Valid keys are 'allow-oob', 'allow-preconfig', 'boxed', 'command', 'data', 'features', 'gen', 'if', 'returns', 'success-response'.
  GEN     hw/i386/trace.h
  GEN     hw/i386/xen/trace.h
make: *** [qapi-gen-timestamp] Error 1
make: *** Waiting for unfinished jobs....
Traceback (most recent call last):
  File "./tests/docker/docker.py", line 662, in <module>
---
    raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', '--label', 'com.qemu.instance.uuid=d3669b4e376a44e4a4d59206be32335f', '-u', '1003', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', '-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 'SHOW_ENV=1', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', '/home/patchew2/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', '/var/tmp/patchew-tester-tmp-_89wvl29/src/docker-src.2020-01-13-11.30.29.16733:/var/tmp/qemu:z,ro', 'qemu:centos7', '/var/tmp/qemu/run', 'test-quick']' returned non-zero exit status 2.
filter=--filter=label=com.qemu.instance.uuid=d3669b4e376a44e4a4d59206be32335f
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-_89wvl29/src'
make: *** [docker-run-test-quick@centos7] Error 2

real    2m20.446s
user    0m8.494s


The full log is available at
http://patchew.org/logs/20200113144848.2168018-1-marcandre.lureau@redhat.com/testing.docker-quick@centos7/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-devel@redhat.com
Re: [PATCH] console: make QMP screendump use coroutine
Posted by no-reply@patchew.org 4 years, 3 months ago
Patchew URL: https://patchew.org/QEMU/20200113144848.2168018-1-marcandre.lureau@redhat.com/



Hi,

This series failed the docker-mingw@fedora build test. Please find the testing commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#! /bin/bash
export ARCH=x86_64
make docker-image-fedora V=1 NETWORK=1
time make docker-test-mingw@fedora J=14 NETWORK=1
=== TEST SCRIPT END ===

/tmp/qemu-test/src/qapi/ui.json:98: command has unknown key 'coroutine'
Valid keys are 'allow-oob', 'allow-preconfig', 'boxed', 'command', 'data', 'features', 'gen', 'if', 'returns', 'success-response'.
  CC      /tmp/qemu-test/build/slirp/src/ncsi.o
make: *** [Makefile:618: qapi-gen-timestamp] Error 1
make: *** Waiting for unfinished jobs....
  CC      /tmp/qemu-test/build/slirp/src/version.o
  CC      /tmp/qemu-test/build/slirp/src/tcp_output.o
---
    raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', '--label', 'com.qemu.instance.uuid=5bdb51421baf44d4a4d0ef4e0dc04458', '-u', '1001', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', '-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 'SHOW_ENV=', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', '/home/patchew/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', '/var/tmp/patchew-tester-tmp-y95ekbja/src/docker-src.2020-01-13-11.34.01.22073:/var/tmp/qemu:z,ro', 'qemu:fedora', '/var/tmp/qemu/run', 'test-mingw']' returned non-zero exit status 2.
filter=--filter=label=com.qemu.instance.uuid=5bdb51421baf44d4a4d0ef4e0dc04458
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-y95ekbja/src'
make: *** [docker-run-test-mingw@fedora] Error 2

real    2m1.045s
user    0m8.011s


The full log is available at
http://patchew.org/logs/20200113144848.2168018-1-marcandre.lureau@redhat.com/testing.docker-mingw@fedora/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-devel@redhat.com
Re: [PATCH] console: make QMP screendump use coroutine
Posted by Gerd Hoffmann 4 years, 2 months ago
  Hi,

> Thanks to the QMP coroutine support, the screendump handler can
> trigger a graphic_hw_update(), yield and let the main loop run until
> update is done. Then the handler is resumed, and the ppm_save() will
> write the screen image to disk in the coroutine context (thus
> non-blocking).
> 
> For now, HMP doesn't have coroutine support, so it remains potentially
> outdated or glitched.
> 
> Fixes:
> https://bugzilla.redhat.com/show_bug.cgi?id=1230527
> 
> Based-on: <20200109183545.27452-2-kwolf@redhat.com>

What is the status here?  Tried to apply (worked) and build (failed),
seems Kevins patches are not merged yet?

thanks,
  Gerd


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Markus Armbruster 4 years, 2 months ago
Gerd Hoffmann <kraxel@redhat.com> writes:

>   Hi,
>
>> Thanks to the QMP coroutine support, the screendump handler can
>> trigger a graphic_hw_update(), yield and let the main loop run until
>> update is done. Then the handler is resumed, and the ppm_save() will
>> write the screen image to disk in the coroutine context (thus
>> non-blocking).
>> 
>> For now, HMP doesn't have coroutine support, so it remains potentially
>> outdated or glitched.
>> 
>> Fixes:
>> https://bugzilla.redhat.com/show_bug.cgi?id=1230527
>> 
>> Based-on: <20200109183545.27452-2-kwolf@redhat.com>
>
> What is the status here?  Tried to apply (worked) and build (failed),
> seems Kevins patches are not merged yet?

I reviewed v3, Kevin worked in improvements promptly, and I failed to
review v4 promptly.  Sorry about that.


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Markus Armbruster 4 years, 1 month ago
I tried to observe the main loop keeps running while the screendump does
its work.

The main loop appears to lack trace points.  Alright, if there's no
hammer handy, I'll use a rock:

diff --git a/softmmu/vl.c b/softmmu/vl.c
index 5549f4b619..b6561a65d7 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1661,6 +1661,7 @@ void qemu_main_loop(void)
 #ifdef CONFIG_PROFILER
         ti = profile_getclock();
 #endif
+        printf("*** main loop\n");
         main_loop_wait(false);
 #ifdef CONFIG_PROFILER
         dev_time += profile_getclock() - ti;


First experiment: does the main loop continue to run when writing out
the screendump blocks / would block?

Observe qmp_screendump() opens the file without O_EXCL.  Great, that
lets me block output by making it open a FIFO.

Terminal#1:

    $ mkfifo s

Terminal#2:

    $ upstream-qemu -S -display none -chardev socket,id=qmp,path=test-qmp,server=on,wait=off -mon mode=control,chardev=qmp
    *** main loop
    *** main loop
    *** main loop

Keeps printing at a steady pace.

Terminal#3:

    $ socat "READLINE,history=$HOME/.qmp_history,prompt=QMP>" UNIX-CONNECT:$HOME/work/images/test-qmp 
    {"QMP": {"version": {"qemu": {"micro": 50, "minor": 2, "major": 4}, "package": "v4.2.0-2069-g5e5ae6b644-dirty"}, "capabilities": ["oob"]}}
    QMP>{"execute": "qmp_capabilities"}
    {"return": {}}
    QMP>{"execute": "screendump", "arguments": {"filename": "s"}}

The printing in terminal#2 stops.  This is expected; qemu_open() calls
open(), which blocks, because the FIFO has no reader.

Terminal#1:

    $ exec 4<s

Now the FIFO has a reader.  Terminal#2 remains quiet.

We now hang in ppm_save().  Abridged stack backtrace:

    #0  0x00007ffff519d0f5 in writev () at /lib64/libc.so.6
    #1  0x0000555555e15f61 in qio_channel_file_writev
        (ioc=0x5555567bf5f0, iov=0x555556a441b0, niov=1, fds=0x0, nfds=0, errp=0x7fffe9d81d10) at /work/armbru/qemu/io/channel-file.c:123
    #2  0x0000555555e133d3 in qio_channel_writev_full
        (ioc=0x5555567bf5f0, iov=0x555556a441b0, niov=1, fds=0x0, nfds=0, errp=0x7fffe9d81d10) at /work/armbru/qemu/io/channel.c:86
    #3  0x0000555555e137a2 in qio_channel_writev
        (ioc=0x5555567bf5f0, iov=0x555556a441b0, niov=1, errp=0x7fffe9d81d10)
        at /work/armbru/qemu/io/channel.c:207
    #4  0x0000555555e13696 in qio_channel_writev_all
        (ioc=0x5555567bf5f0, iov=0x7fffe9d81bd0, niov=1, errp=0x7fffe9d81d10)
        at /work/armbru/qemu/io/channel.c:171
    #5  0x0000555555e139b1 in qio_channel_write_all
        (ioc=0x5555567bf5f0, buf=0x555556b05200 "", buflen=1920, errp=0x7fffe9d81d10) at /work/armbru/qemu/io/channel.c:257
    #6  0x0000555555cd74ff in ppm_save
        (fd=22, image=0x5555568ffdd0, errp=0x7fffe9d81d10)
        at /work/armbru/qemu/ui/console.c:336
    #7  0x0000555555cd77e6 in qmp_screendump
        (filename=0x555556ea0900 "s", has_device=false, device=0x0, has_head=false, head=0, errp=0x7fffe9d81d10) at /work/armbru/qemu/ui/console.c:401

A brief inspection of qio_channel_file_writev() and
qio_channel_writev_all() suggests this might work if you make the output
file descriptor non-blocking.

    $ head -c 1 <&4 | hexdump -C
    00000000  50                                                |P|
    00000001

Still quiet.

    $ cat <&4 >/dev/null

The printing resumes.

    $ exec 4<&-


Second experiment: does the main loop continue to run while we wait for
graphic_hw_update_done()?

Left as an exercise for the patch submitter ;)


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Marc-André Lureau 4 years, 1 month ago
Hi

On Thu, Mar 5, 2020 at 3:46 PM Markus Armbruster <armbru@redhat.com> wrote:
>
> I tried to observe the main loop keeps running while the screendump does
> its work.
>
> The main loop appears to lack trace points.  Alright, if there's no
> hammer handy, I'll use a rock:
>
> diff --git a/softmmu/vl.c b/softmmu/vl.c
> index 5549f4b619..b6561a65d7 100644
> --- a/softmmu/vl.c
> +++ b/softmmu/vl.c
> @@ -1661,6 +1661,7 @@ void qemu_main_loop(void)
>  #ifdef CONFIG_PROFILER
>          ti = profile_getclock();
>  #endif
> +        printf("*** main loop\n");
>          main_loop_wait(false);
>  #ifdef CONFIG_PROFILER
>          dev_time += profile_getclock() - ti;
>
>
> First experiment: does the main loop continue to run when writing out
> the screendump blocks / would block?
>
> Observe qmp_screendump() opens the file without O_EXCL.  Great, that
> lets me block output by making it open a FIFO.
>
> Terminal#1:
>
>     $ mkfifo s
>
> Terminal#2:
>
>     $ upstream-qemu -S -display none -chardev socket,id=qmp,path=test-qmp,server=on,wait=off -mon mode=control,chardev=qmp
>     *** main loop
>     *** main loop
>     *** main loop
>
> Keeps printing at a steady pace.
>
> Terminal#3:
>
>     $ socat "READLINE,history=$HOME/.qmp_history,prompt=QMP>" UNIX-CONNECT:$HOME/work/images/test-qmp
>     {"QMP": {"version": {"qemu": {"micro": 50, "minor": 2, "major": 4}, "package": "v4.2.0-2069-g5e5ae6b644-dirty"}, "capabilities": ["oob"]}}
>     QMP>{"execute": "qmp_capabilities"}
>     {"return": {}}
>     QMP>{"execute": "screendump", "arguments": {"filename": "s"}}
>
> The printing in terminal#2 stops.  This is expected; qemu_open() calls
> open(), which blocks, because the FIFO has no reader.
>
> Terminal#1:
>
>     $ exec 4<s
>
> Now the FIFO has a reader.  Terminal#2 remains quiet.
>
> We now hang in ppm_save().  Abridged stack backtrace:
>
>     #0  0x00007ffff519d0f5 in writev () at /lib64/libc.so.6
>     #1  0x0000555555e15f61 in qio_channel_file_writev
>         (ioc=0x5555567bf5f0, iov=0x555556a441b0, niov=1, fds=0x0, nfds=0, errp=0x7fffe9d81d10) at /work/armbru/qemu/io/channel-file.c:123
>     #2  0x0000555555e133d3 in qio_channel_writev_full
>         (ioc=0x5555567bf5f0, iov=0x555556a441b0, niov=1, fds=0x0, nfds=0, errp=0x7fffe9d81d10) at /work/armbru/qemu/io/channel.c:86
>     #3  0x0000555555e137a2 in qio_channel_writev
>         (ioc=0x5555567bf5f0, iov=0x555556a441b0, niov=1, errp=0x7fffe9d81d10)
>         at /work/armbru/qemu/io/channel.c:207
>     #4  0x0000555555e13696 in qio_channel_writev_all
>         (ioc=0x5555567bf5f0, iov=0x7fffe9d81bd0, niov=1, errp=0x7fffe9d81d10)
>         at /work/armbru/qemu/io/channel.c:171
>     #5  0x0000555555e139b1 in qio_channel_write_all
>         (ioc=0x5555567bf5f0, buf=0x555556b05200 "", buflen=1920, errp=0x7fffe9d81d10) at /work/armbru/qemu/io/channel.c:257
>     #6  0x0000555555cd74ff in ppm_save
>         (fd=22, image=0x5555568ffdd0, errp=0x7fffe9d81d10)
>         at /work/armbru/qemu/ui/console.c:336
>     #7  0x0000555555cd77e6 in qmp_screendump
>         (filename=0x555556ea0900 "s", has_device=false, device=0x0, has_head=false, head=0, errp=0x7fffe9d81d10) at /work/armbru/qemu/ui/console.c:401
>
> A brief inspection of qio_channel_file_writev() and
> qio_channel_writev_all() suggests this might work if you make the output
> file descriptor non-blocking.

Right, the goal was rather originally to fix rhbz#1230527. We got
coroutine IO by accident, and I was too optimistic about default
behaviour change ;) I will update the patch.

>
>     $ head -c 1 <&4 | hexdump -C
>     00000000  50                                                |P|
>     00000001
>
> Still quiet.
>
>     $ cat <&4 >/dev/null
>
> The printing resumes.
>
>     $ exec 4<&-
>
>
> Second experiment: does the main loop continue to run while we wait for
> graphic_hw_update_done()?
>
> Left as an exercise for the patch submitter ;)
>
>

With your main loop printf, one printf in graphic_hw_update() and one
in graphic_hw_update_done() ? (rather part of testing commit
4d6316218bf7bf3b8c7c7165b072cc314511a7a7, soon 4y old!)

-- 
Marc-André Lureau

Re: [PATCH] console: make QMP screendump use coroutine
Posted by Markus Armbruster 4 years, 1 month ago
Marc-André Lureau <marcandre.lureau@gmail.com> writes:

> Hi
>
> On Thu, Mar 5, 2020 at 3:46 PM Markus Armbruster <armbru@redhat.com> wrote:
>>
>> I tried to observe the main loop keeps running while the screendump does
>> its work.
>>
>> The main loop appears to lack trace points.  Alright, if there's no
>> hammer handy, I'll use a rock:
>>
>> diff --git a/softmmu/vl.c b/softmmu/vl.c
>> index 5549f4b619..b6561a65d7 100644
>> --- a/softmmu/vl.c
>> +++ b/softmmu/vl.c
>> @@ -1661,6 +1661,7 @@ void qemu_main_loop(void)
>>  #ifdef CONFIG_PROFILER
>>          ti = profile_getclock();
>>  #endif
>> +        printf("*** main loop\n");
>>          main_loop_wait(false);
>>  #ifdef CONFIG_PROFILER
>>          dev_time += profile_getclock() - ti;
>>
>>
>> First experiment: does the main loop continue to run when writing out
>> the screendump blocks / would block?
>>
>> Observe qmp_screendump() opens the file without O_EXCL.  Great, that
>> lets me block output by making it open a FIFO.
>>
>> Terminal#1:
>>
>>     $ mkfifo s
>>
>> Terminal#2:
>>
>>     $ upstream-qemu -S -display none -chardev socket,id=qmp,path=test-qmp,server=on,wait=off -mon mode=control,chardev=qmp
>>     *** main loop
>>     *** main loop
>>     *** main loop
>>
>> Keeps printing at a steady pace.
>>
>> Terminal#3:
>>
>>     $ socat "READLINE,history=$HOME/.qmp_history,prompt=QMP>" UNIX-CONNECT:$HOME/work/images/test-qmp
>>     {"QMP": {"version": {"qemu": {"micro": 50, "minor": 2, "major": 4}, "package": "v4.2.0-2069-g5e5ae6b644-dirty"}, "capabilities": ["oob"]}}
>>     QMP>{"execute": "qmp_capabilities"}
>>     {"return": {}}
>>     QMP>{"execute": "screendump", "arguments": {"filename": "s"}}
>>
>> The printing in terminal#2 stops.  This is expected; qemu_open() calls
>> open(), which blocks, because the FIFO has no reader.
>>
>> Terminal#1:
>>
>>     $ exec 4<s
>>
>> Now the FIFO has a reader.  Terminal#2 remains quiet.
>>
>> We now hang in ppm_save().  Abridged stack backtrace:
>>
>>     #0  0x00007ffff519d0f5 in writev () at /lib64/libc.so.6
>>     #1  0x0000555555e15f61 in qio_channel_file_writev
>>         (ioc=0x5555567bf5f0, iov=0x555556a441b0, niov=1, fds=0x0, nfds=0, errp=0x7fffe9d81d10) at /work/armbru/qemu/io/channel-file.c:123
>>     #2  0x0000555555e133d3 in qio_channel_writev_full
>>         (ioc=0x5555567bf5f0, iov=0x555556a441b0, niov=1, fds=0x0, nfds=0, errp=0x7fffe9d81d10) at /work/armbru/qemu/io/channel.c:86
>>     #3  0x0000555555e137a2 in qio_channel_writev
>>         (ioc=0x5555567bf5f0, iov=0x555556a441b0, niov=1, errp=0x7fffe9d81d10)
>>         at /work/armbru/qemu/io/channel.c:207
>>     #4  0x0000555555e13696 in qio_channel_writev_all
>>         (ioc=0x5555567bf5f0, iov=0x7fffe9d81bd0, niov=1, errp=0x7fffe9d81d10)
>>         at /work/armbru/qemu/io/channel.c:171
>>     #5  0x0000555555e139b1 in qio_channel_write_all
>>         (ioc=0x5555567bf5f0, buf=0x555556b05200 "", buflen=1920, errp=0x7fffe9d81d10) at /work/armbru/qemu/io/channel.c:257
>>     #6  0x0000555555cd74ff in ppm_save
>>         (fd=22, image=0x5555568ffdd0, errp=0x7fffe9d81d10)
>>         at /work/armbru/qemu/ui/console.c:336
>>     #7  0x0000555555cd77e6 in qmp_screendump
>>         (filename=0x555556ea0900 "s", has_device=false, device=0x0, has_head=false, head=0, errp=0x7fffe9d81d10) at /work/armbru/qemu/ui/console.c:401
>>
>> A brief inspection of qio_channel_file_writev() and
>> qio_channel_writev_all() suggests this might work if you make the output
>> file descriptor non-blocking.
>
> Right, the goal was rather originally to fix rhbz#1230527. We got
> coroutine IO by accident, and I was too optimistic about default
> behaviour change ;) I will update the patch.
>
>>
>>     $ head -c 1 <&4 | hexdump -C
>>     00000000  50                                                |P|
>>     00000001
>>
>> Still quiet.
>>
>>     $ cat <&4 >/dev/null
>>
>> The printing resumes.
>>
>>     $ exec 4<&-
>>
>>
>> Second experiment: does the main loop continue to run while we wait for
>> graphic_hw_update_done()?
>>
>> Left as an exercise for the patch submitter ;)
>>
>>
>
> With your main loop printf, one printf in graphic_hw_update() and one
> in graphic_hw_update_done() ? (rather part of testing commit
> 4d6316218bf7bf3b8c7c7165b072cc314511a7a7, soon 4y old!)

We might also need to artificially delay graphic_hw_update_done().


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Markus Armbruster 4 years, 1 month ago
Marc-André Lureau <marcandre.lureau@redhat.com> writes:

> Thanks to the QMP coroutine support, the screendump handler can
> trigger a graphic_hw_update(), yield and let the main loop run until
> update is done. Then the handler is resumed, and the ppm_save() will
> write the screen image to disk in the coroutine context (thus
> non-blocking).
>
> For now, HMP doesn't have coroutine support, so it remains potentially
> outdated or glitched.
>
> Fixes:
> https://bugzilla.redhat.com/show_bug.cgi?id=1230527
>
> Based-on: <20200109183545.27452-2-kwolf@redhat.com>
>
> Cc: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
>  qapi/ui.json    |  3 ++-
>  ui/console.c    | 35 +++++++++++++++++++++++++++--------
>  ui/trace-events |  2 +-
>  3 files changed, 30 insertions(+), 10 deletions(-)
>
> diff --git a/qapi/ui.json b/qapi/ui.json
> index e04525d8b4..d941202f34 100644
> --- a/qapi/ui.json
> +++ b/qapi/ui.json
> @@ -96,7 +96,8 @@
>  #
>  ##
>  { 'command': 'screendump',
> -  'data': {'filename': 'str', '*device': 'str', '*head': 'int'} }
> +  'data': {'filename': 'str', '*device': 'str', '*head': 'int'},
> +  'coroutine': true }
>  
>  ##
>  # == Spice
> diff --git a/ui/console.c b/ui/console.c
> index ac79d679f5..db184b473f 100644
> --- a/ui/console.c
> +++ b/ui/console.c
> @@ -167,6 +167,7 @@ struct QemuConsole {
>      QEMUFIFO out_fifo;
>      uint8_t out_fifo_buf[16];
>      QEMUTimer *kbd_timer;
> +    Coroutine *screendump_co;
>  
>      QTAILQ_ENTRY(QemuConsole) next;
>  };
> @@ -194,7 +195,6 @@ static void dpy_refresh(DisplayState *s);
>  static DisplayState *get_alloc_displaystate(void);
>  static void text_console_update_cursor_timer(void);
>  static void text_console_update_cursor(void *opaque);
> -static bool ppm_save(int fd, DisplaySurface *ds, Error **errp);
>  
>  static void gui_update(void *opaque)
>  {
> @@ -263,6 +263,9 @@ static void gui_setup_refresh(DisplayState *ds)
>  
>  void graphic_hw_update_done(QemuConsole *con)
>  {
> +    if (con && con->screendump_co) {

How can !con happen?

> +        aio_co_wake(con->screendump_co);
> +    }
>  }
>  
>  void graphic_hw_update(QemuConsole *con)
> @@ -310,16 +313,16 @@ void graphic_hw_invalidate(QemuConsole *con)
>      }
>  }
>  
> -static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
> +static bool ppm_save(int fd, pixman_image_t *image, Error **errp)
>  {
> -    int width = pixman_image_get_width(ds->image);
> -    int height = pixman_image_get_height(ds->image);
> +    int width = pixman_image_get_width(image);
> +    int height = pixman_image_get_height(image);
>      g_autoptr(Object) ioc = OBJECT(qio_channel_file_new_fd(fd));
>      g_autofree char *header = NULL;
>      g_autoptr(pixman_image_t) linebuf = NULL;
>      int y;
>  
> -    trace_ppm_save(fd, ds);
> +    trace_ppm_save(fd, image);
>  
>      header = g_strdup_printf("P6\n%d %d\n%d\n", width, height, 255);
>      if (qio_channel_write_all(QIO_CHANNEL(ioc),
> @@ -329,7 +332,7 @@ static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
>  
>      linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
>      for (y = 0; y < height; y++) {
> -        qemu_pixman_linebuf_fill(linebuf, ds->image, width, 0, y);
> +        qemu_pixman_linebuf_fill(linebuf, image, width, 0, y);
>          if (qio_channel_write_all(QIO_CHANNEL(ioc),
>                                    (char *)pixman_image_get_data(linebuf),
>                                    pixman_image_get_stride(linebuf), errp) < 0) {

Looks like an unrelated optimization / simplification.  If I was
maintainer, I'd ask for a separate patch.

> @@ -340,11 +343,18 @@ static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
>      return true;
>  }
>  
> +static void graphic_hw_update_bh(void *con)
> +{
> +    graphic_hw_update(con);
> +}
> +
> +/* may be called in coroutine context or not */

Hmm.

Even though the QMP core always calls in coroutine context, the comment
is correct: hmp_screendump() calls it outside coroutine context.
Because of that...

>  void qmp_screendump(const char *filename, bool has_device, const char *device,
>                      bool has_head, int64_t head, Error **errp)
>  {
>      QemuConsole *con;
>      DisplaySurface *surface;
> +    g_autoptr(pixman_image_t) image = NULL;
>      int fd;
>  
>      if (has_device) {
> @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
>          }
>      }
>  
> -    graphic_hw_update(con);
> +    if (qemu_in_coroutine()) {
> +        assert(!con->screendump_co);

What if multiple QMP monitors simultaneously screendump?  Hmm, it works
because all execute one after another in the same coroutine
qmp_dispatcher_co.  Implicit mutual exclusion.

Executing them one after another is bad, because it lets an ill-behaved
QMP command starve *all* QMP monitors.  We do it only out of
(reasonable!) fear of implicit mutual exclusion requirements like the
one you add.

Let's not add more if we can help it.

Your screendump_co is per QemuConsole instead of per QMP monitor only
because you need to find the coroutine in graphic_hw_update_done().  Can
we somehow pass it via function arguments?

In case avoiding the mutual exclusion is impractical: please explain it
in a comment to make it somewhat less implicit.

> +        con->screendump_co = qemu_coroutine_self();
> +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
> +                                graphic_hw_update_bh, con);
> +        qemu_coroutine_yield();
> +        con->screendump_co = NULL;
> +    }
> +

... the command handler needs extra code to cope with either.  Is this
really what we want for coroutine QMP command handlers?  We'll acquire
more of them, and I'd hate to make each one run both in and outside
coroutine context.  Shouldn't we let the HMP core take care of this?  Or
at least have some common infrastructure these handlers can use?

Why is it okay not to call graphic_hw_update() anymore when
!qemu_in_coroutine()?

If qemu_in_coroutine(), we now run graphic_hw_update() in a bottom half,
then yield until the update completes (see graphic_hw_update_done()
above).  Can you explain the need for the bottom half?

>      surface = qemu_console_surface(con);
>      if (!surface) {
>          error_setg(errp, "no surface");
> @@ -379,7 +397,8 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
>          return;
>      }
>  
> -    if (!ppm_save(fd, surface, errp)) {
> +    image = pixman_image_ref(surface->image);
> +    if (!ppm_save(fd, image, errp)) {
>          qemu_unlink(filename);
>      }
>  }
> diff --git a/ui/trace-events b/ui/trace-events
> index 0dcda393c1..e8726fc969 100644
> --- a/ui/trace-events
> +++ b/ui/trace-events
> @@ -15,7 +15,7 @@ displaysurface_create_pixman(void *display_surface) "surface=%p"
>  displaysurface_free(void *display_surface) "surface=%p"
>  displaychangelistener_register(void *dcl, const char *name) "%p [ %s ]"
>  displaychangelistener_unregister(void *dcl, const char *name) "%p [ %s ]"
> -ppm_save(int fd, void *display_surface) "fd=%d surface=%p"
> +ppm_save(int fd, void *image) "fd=%d image=%p"
>  
>  # gtk.c
>  # gtk-gl-area.c


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Marc-André Lureau 4 years, 1 month ago
Hi

On Thu, Feb 20, 2020 at 8:49 AM Markus Armbruster <armbru@redhat.com> wrote:
>
> Marc-André Lureau <marcandre.lureau@redhat.com> writes:
>
> > Thanks to the QMP coroutine support, the screendump handler can
> > trigger a graphic_hw_update(), yield and let the main loop run until
> > update is done. Then the handler is resumed, and the ppm_save() will
> > write the screen image to disk in the coroutine context (thus
> > non-blocking).
> >
> > For now, HMP doesn't have coroutine support, so it remains potentially
> > outdated or glitched.
> >
> > Fixes:
> > https://bugzilla.redhat.com/show_bug.cgi?id=1230527
> >
> > Based-on: <20200109183545.27452-2-kwolf@redhat.com>
> >
> > Cc: Kevin Wolf <kwolf@redhat.com>
> > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > ---
> >  qapi/ui.json    |  3 ++-
> >  ui/console.c    | 35 +++++++++++++++++++++++++++--------
> >  ui/trace-events |  2 +-
> >  3 files changed, 30 insertions(+), 10 deletions(-)
> >
> > diff --git a/qapi/ui.json b/qapi/ui.json
> > index e04525d8b4..d941202f34 100644
> > --- a/qapi/ui.json
> > +++ b/qapi/ui.json
> > @@ -96,7 +96,8 @@
> >  #
> >  ##
> >  { 'command': 'screendump',
> > -  'data': {'filename': 'str', '*device': 'str', '*head': 'int'} }
> > +  'data': {'filename': 'str', '*device': 'str', '*head': 'int'},
> > +  'coroutine': true }
> >
> >  ##
> >  # == Spice
> > diff --git a/ui/console.c b/ui/console.c
> > index ac79d679f5..db184b473f 100644
> > --- a/ui/console.c
> > +++ b/ui/console.c
> > @@ -167,6 +167,7 @@ struct QemuConsole {
> >      QEMUFIFO out_fifo;
> >      uint8_t out_fifo_buf[16];
> >      QEMUTimer *kbd_timer;
> > +    Coroutine *screendump_co;
> >
> >      QTAILQ_ENTRY(QemuConsole) next;
> >  };
> > @@ -194,7 +195,6 @@ static void dpy_refresh(DisplayState *s);
> >  static DisplayState *get_alloc_displaystate(void);
> >  static void text_console_update_cursor_timer(void);
> >  static void text_console_update_cursor(void *opaque);
> > -static bool ppm_save(int fd, DisplaySurface *ds, Error **errp);
> >
> >  static void gui_update(void *opaque)
> >  {
> > @@ -263,6 +263,9 @@ static void gui_setup_refresh(DisplayState *ds)
> >
> >  void graphic_hw_update_done(QemuConsole *con)
> >  {
> > +    if (con && con->screendump_co) {
>
> How can !con happen?

I don't think it can happen anymore (the patch evolved over several
years, this is probably a left-over). In any case, it doesn't hurt.


>
> > +        aio_co_wake(con->screendump_co);
> > +    }
> >  }
> >
> >  void graphic_hw_update(QemuConsole *con)
> > @@ -310,16 +313,16 @@ void graphic_hw_invalidate(QemuConsole *con)
> >      }
> >  }
> >
> > -static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
> > +static bool ppm_save(int fd, pixman_image_t *image, Error **errp)
> >  {
> > -    int width = pixman_image_get_width(ds->image);
> > -    int height = pixman_image_get_height(ds->image);
> > +    int width = pixman_image_get_width(image);
> > +    int height = pixman_image_get_height(image);
> >      g_autoptr(Object) ioc = OBJECT(qio_channel_file_new_fd(fd));
> >      g_autofree char *header = NULL;
> >      g_autoptr(pixman_image_t) linebuf = NULL;
> >      int y;
> >
> > -    trace_ppm_save(fd, ds);
> > +    trace_ppm_save(fd, image);
> >
> >      header = g_strdup_printf("P6\n%d %d\n%d\n", width, height, 255);
> >      if (qio_channel_write_all(QIO_CHANNEL(ioc),
> > @@ -329,7 +332,7 @@ static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
> >
> >      linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
> >      for (y = 0; y < height; y++) {
> > -        qemu_pixman_linebuf_fill(linebuf, ds->image, width, 0, y);
> > +        qemu_pixman_linebuf_fill(linebuf, image, width, 0, y);
> >          if (qio_channel_write_all(QIO_CHANNEL(ioc),
> >                                    (char *)pixman_image_get_data(linebuf),
> >                                    pixman_image_get_stride(linebuf), errp) < 0) {
>
> Looks like an unrelated optimization / simplification.  If I was
> maintainer, I'd ask for a separate patch.

I can be split, but it's related. We should pass a reference to
pixman_image_t, rather than a pointer to DisplaySurface, as the
underlying image may change over time, and would result in corrupted
coroutine save or worse.

> > @@ -340,11 +343,18 @@ static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
> >      return true;
> >  }
> >
> > +static void graphic_hw_update_bh(void *con)
> > +{
> > +    graphic_hw_update(con);
> > +}
> > +
> > +/* may be called in coroutine context or not */
>
> Hmm.
>
> Even though the QMP core always calls in coroutine context, the comment
> is correct: hmp_screendump() calls it outside coroutine context.
> Because of that...
>
> >  void qmp_screendump(const char *filename, bool has_device, const char *device,
> >                      bool has_head, int64_t head, Error **errp)
> >  {
> >      QemuConsole *con;
> >      DisplaySurface *surface;
> > +    g_autoptr(pixman_image_t) image = NULL;
> >      int fd;
> >
> >      if (has_device) {
> > @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
> >          }
> >      }
> >
> > -    graphic_hw_update(con);
> > +    if (qemu_in_coroutine()) {
> > +        assert(!con->screendump_co);
>
> What if multiple QMP monitors simultaneously screendump?  Hmm, it works
> because all execute one after another in the same coroutine
> qmp_dispatcher_co.  Implicit mutual exclusion.
>
> Executing them one after another is bad, because it lets an ill-behaved
> QMP command starve *all* QMP monitors.  We do it only out of
> (reasonable!) fear of implicit mutual exclusion requirements like the
> one you add.
>
> Let's not add more if we can help it.

The situation is not worse than the current blocking handling.

>
> Your screendump_co is per QemuConsole instead of per QMP monitor only
> because you need to find the coroutine in graphic_hw_update_done().  Can
> we somehow pass it via function arguments?

I think it could be done later, so I suggest a TODO.

> In case avoiding the mutual exclusion is impractical: please explain it
> in a comment to make it somewhat less implicit.
>
> > +        con->screendump_co = qemu_coroutine_self();
> > +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
> > +                                graphic_hw_update_bh, con);
> > +        qemu_coroutine_yield();
> > +        con->screendump_co = NULL;
> > +    }
> > +
>
> ... the command handler needs extra code to cope with either.  Is this
> really what we want for coroutine QMP command handlers?  We'll acquire
> more of them, and I'd hate to make each one run both in and outside
> coroutine context.  Shouldn't we let the HMP core take care of this?  Or
> at least have some common infrastructure these handlers can use?

We have several functions that have this dual support, for ex QIO.

Changing both QMP & HMP commands to run in coroutine is likely
additional work that we may not care at this point.

I propose to leave a TODO, once we have several similar QMP & HMP mix
cases we can try to find a common HMP solution to make the code
simpler in QMP handler.

I don't know if this is going to be a common pattern, we may end up
with conversions that can run both without explicit handling (like the
ppm_save() function, thanks to QIO).

>
> Why is it okay not to call graphic_hw_update() anymore when
> !qemu_in_coroutine()?

You could call it, but then you should wait for completion by
reentering the main loop (that was the point of my earlier qapi-async
series)

>
> If qemu_in_coroutine(), we now run graphic_hw_update() in a bottom half,
> then yield until the update completes (see graphic_hw_update_done()
> above).  Can you explain the need for the bottom half?

At least spice rendering is done in a separate thread, completion is async.

>
> >      surface = qemu_console_surface(con);
> >      if (!surface) {
> >          error_setg(errp, "no surface");
> > @@ -379,7 +397,8 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
> >          return;
> >      }
> >
> > -    if (!ppm_save(fd, surface, errp)) {
> > +    image = pixman_image_ref(surface->image);
> > +    if (!ppm_save(fd, image, errp)) {
> >          qemu_unlink(filename);
> >      }
> >  }
> > diff --git a/ui/trace-events b/ui/trace-events
> > index 0dcda393c1..e8726fc969 100644
> > --- a/ui/trace-events
> > +++ b/ui/trace-events
> > @@ -15,7 +15,7 @@ displaysurface_create_pixman(void *display_surface) "surface=%p"
> >  displaysurface_free(void *display_surface) "surface=%p"
> >  displaychangelistener_register(void *dcl, const char *name) "%p [ %s ]"
> >  displaychangelistener_unregister(void *dcl, const char *name) "%p [ %s ]"
> > -ppm_save(int fd, void *display_surface) "fd=%d surface=%p"
> > +ppm_save(int fd, void *image) "fd=%d image=%p"
> >
> >  # gtk.c
> >  # gtk-gl-area.c
>
>



-- 
Marc-André Lureau

Re: [PATCH] console: make QMP screendump use coroutine
Posted by Markus Armbruster 4 years, 1 month ago
Cc: David for questions regarding the HMP core.  David, please look for
"Is HMP blocking the main loop a problem?"

Marc-André Lureau <marcandre.lureau@gmail.com> writes:

> Hi
>
> On Thu, Feb 20, 2020 at 8:49 AM Markus Armbruster <armbru@redhat.com> wrote:
>>
>> Marc-André Lureau <marcandre.lureau@redhat.com> writes:
>>
>> > Thanks to the QMP coroutine support, the screendump handler can
>> > trigger a graphic_hw_update(), yield and let the main loop run until
>> > update is done. Then the handler is resumed, and the ppm_save() will
>> > write the screen image to disk in the coroutine context (thus
>> > non-blocking).
>> >
>> > For now, HMP doesn't have coroutine support, so it remains potentially
>> > outdated or glitched.
>> >
>> > Fixes:
>> > https://bugzilla.redhat.com/show_bug.cgi?id=1230527
>> >
>> > Based-on: <20200109183545.27452-2-kwolf@redhat.com>
>> >
>> > Cc: Kevin Wolf <kwolf@redhat.com>
>> > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
>> > ---
>> >  qapi/ui.json    |  3 ++-
>> >  ui/console.c    | 35 +++++++++++++++++++++++++++--------
>> >  ui/trace-events |  2 +-
>> >  3 files changed, 30 insertions(+), 10 deletions(-)
>> >
>> > diff --git a/qapi/ui.json b/qapi/ui.json
>> > index e04525d8b4..d941202f34 100644
>> > --- a/qapi/ui.json
>> > +++ b/qapi/ui.json
>> > @@ -96,7 +96,8 @@
>> >  #
>> >  ##
>> >  { 'command': 'screendump',
>> > -  'data': {'filename': 'str', '*device': 'str', '*head': 'int'} }
>> > +  'data': {'filename': 'str', '*device': 'str', '*head': 'int'},
>> > +  'coroutine': true }
>> >
>> >  ##
>> >  # == Spice
>> > diff --git a/ui/console.c b/ui/console.c
>> > index ac79d679f5..db184b473f 100644
>> > --- a/ui/console.c
>> > +++ b/ui/console.c
>> > @@ -167,6 +167,7 @@ struct QemuConsole {
>> >      QEMUFIFO out_fifo;
>> >      uint8_t out_fifo_buf[16];
>> >      QEMUTimer *kbd_timer;
>> > +    Coroutine *screendump_co;
>> >
>> >      QTAILQ_ENTRY(QemuConsole) next;
>> >  };
>> > @@ -194,7 +195,6 @@ static void dpy_refresh(DisplayState *s);
>> >  static DisplayState *get_alloc_displaystate(void);
>> >  static void text_console_update_cursor_timer(void);
>> >  static void text_console_update_cursor(void *opaque);
>> > -static bool ppm_save(int fd, DisplaySurface *ds, Error **errp);
>> >
>> >  static void gui_update(void *opaque)
>> >  {
>> > @@ -263,6 +263,9 @@ static void gui_setup_refresh(DisplayState *ds)
>> >
>> >  void graphic_hw_update_done(QemuConsole *con)
>> >  {
>> > +    if (con && con->screendump_co) {
>>
>> How can !con happen?
>
> I don't think it can happen anymore (the patch evolved over several
> years, this is probably a left-over). In any case, it doesn't hurt.

I hate such dead checks, because they make me assume they can actually
happen.  Incorrect assumptions breed bugs.

But I'm willing to defer to the maintainer here.  Gerd?

>> > +        aio_co_wake(con->screendump_co);
>> > +    }
>> >  }
>> >
>> >  void graphic_hw_update(QemuConsole *con)
>> > @@ -310,16 +313,16 @@ void graphic_hw_invalidate(QemuConsole *con)
>> >      }
>> >  }
>> >
>> > -static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
>> > +static bool ppm_save(int fd, pixman_image_t *image, Error **errp)
>> >  {
>> > -    int width = pixman_image_get_width(ds->image);
>> > -    int height = pixman_image_get_height(ds->image);
>> > +    int width = pixman_image_get_width(image);
>> > +    int height = pixman_image_get_height(image);
>> >      g_autoptr(Object) ioc = OBJECT(qio_channel_file_new_fd(fd));
>> >      g_autofree char *header = NULL;
>> >      g_autoptr(pixman_image_t) linebuf = NULL;
>> >      int y;
>> >
>> > -    trace_ppm_save(fd, ds);
>> > +    trace_ppm_save(fd, image);
>> >
>> >      header = g_strdup_printf("P6\n%d %d\n%d\n", width, height, 255);
>> >      if (qio_channel_write_all(QIO_CHANNEL(ioc),
>> > @@ -329,7 +332,7 @@ static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
>> >
>> >      linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
>> >      for (y = 0; y < height; y++) {
>> > -        qemu_pixman_linebuf_fill(linebuf, ds->image, width, 0, y);
>> > +        qemu_pixman_linebuf_fill(linebuf, image, width, 0, y);
>> >          if (qio_channel_write_all(QIO_CHANNEL(ioc),
>> >                                    (char *)pixman_image_get_data(linebuf),
>> >                                    pixman_image_get_stride(linebuf), errp) < 0) {
>>
>> Looks like an unrelated optimization / simplification.  If I was
>> maintainer, I'd ask for a separate patch.
>
> I can be split, but it's related. We should pass a reference to
> pixman_image_t, rather than a pointer to DisplaySurface, as the
> underlying image may change over time, and would result in corrupted
> coroutine save or worse.

Work that into your commit message, please.  Might be easier if you
split, but that's between you and the maintainer :)

>> > @@ -340,11 +343,18 @@ static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
>> >      return true;
>> >  }
>> >
>> > +static void graphic_hw_update_bh(void *con)
>> > +{
>> > +    graphic_hw_update(con);
>> > +}
>> > +
>> > +/* may be called in coroutine context or not */
>>
>> Hmm.
>>
>> Even though the QMP core always calls in coroutine context, the comment
>> is correct: hmp_screendump() calls it outside coroutine context.
>> Because of that...
>>
>> >  void qmp_screendump(const char *filename, bool has_device, const char *device,
>> >                      bool has_head, int64_t head, Error **errp)
>> >  {
>> >      QemuConsole *con;
>> >      DisplaySurface *surface;
>> > +    g_autoptr(pixman_image_t) image = NULL;
>> >      int fd;
>> >
>> >      if (has_device) {
>> > @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
>> >          }
>> >      }
>> >
>> > -    graphic_hw_update(con);
>> > +    if (qemu_in_coroutine()) {
>> > +        assert(!con->screendump_co);
>>
>> What if multiple QMP monitors simultaneously screendump?  Hmm, it works
>> because all execute one after another in the same coroutine
>> qmp_dispatcher_co.  Implicit mutual exclusion.
>>
>> Executing them one after another is bad, because it lets an ill-behaved
>> QMP command starve *all* QMP monitors.  We do it only out of
>> (reasonable!) fear of implicit mutual exclusion requirements like the
>> one you add.
>>
>> Let's not add more if we can help it.
>
> The situation is not worse than the current blocking handling.

Really?

What makes executing multiple qmp_screendump() concurrently (in separate
threads) or interleaved (in separate coroutines in the same thread)
unsafe before this patch?

>> Your screendump_co is per QemuConsole instead of per QMP monitor only
>> because you need to find the coroutine in graphic_hw_update_done().  Can
>> we somehow pass it via function arguments?
>
> I think it could be done later, so I suggest a TODO.

We should avoid making our dependence on implicit mutual exclusion
worse.  When we do it anyway, a big, fat, ugly comment is definitely
called for.

>> In case avoiding the mutual exclusion is impractical: please explain it
>> in a comment to make it somewhat less implicit.

It is anything but: see appended patch.

>> > +        con->screendump_co = qemu_coroutine_self();
>> > +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
>> > +                                graphic_hw_update_bh, con);
>> > +        qemu_coroutine_yield();
>> > +        con->screendump_co = NULL;
>> > +    }
>> > +
>>
>> ... the command handler needs extra code to cope with either.  Is this
>> really what we want for coroutine QMP command handlers?  We'll acquire
>> more of them, and I'd hate to make each one run both in and outside
>> coroutine context.  Shouldn't we let the HMP core take care of this?  Or
>> at least have some common infrastructure these handlers can use?
>
> We have several functions that have this dual support, for ex QIO.
>
> Changing both QMP & HMP commands to run in coroutine is likely
> additional work that we may not care at this point.

If it wasn't for non-QMP calls (typically HMP, but also CLI), then
handlers for QMP commands with 'coroutine': true could be coroutine_fn.

So far, coroutine_fn is merely documentation.  Perhaps it can guide a
checker for "do coroutine stuff only in coroutines" some day.  Would be
nice, because the coroutine calls are often buried deep, and far away
from the code that ensures they run in a coroutine.

My point is: marking functions coroutine_fn is good.  We should do it
more.  We should try to avoid stuff that hinders doing it more.

> I propose to leave a TODO, once we have several similar QMP & HMP mix
> cases we can try to find a common HMP solution to make the code
> simpler in QMP handler.

Collecting several users before building infrastructure makes sense when
the design of the infrastructure isn't obvious, or when the need for it
is in doubt.

Neither is the case for running QMP handlers in a coroutine: QMP
commands blocking the main loop is without doubt a problem we need to
solve, and the way to solve it was obvious enough for Kevin to do it
with one user: block_resize.  A second one quickly followed: screendump.

The only part that's different for HMP, I think, is "need".

Is HMP blocking the main loop a problem?

If yes, is it serious enough to justify solving it?

If yes, then putting workarounds into QMP handlers now so we can put off
solving it some more is taking on technical debt.

> I don't know if this is going to be a common pattern, we may end up
> with conversions that can run both without explicit handling (like the
> ppm_save() function, thanks to QIO).

Yes, such handlers may exist.  Running them out of coroutine context
would throw away their capability not to block the event loop, though,
wouldn't it?

>> Why is it okay not to call graphic_hw_update() anymore when
>> !qemu_in_coroutine()?
>
> You could call it, but then you should wait for completion by
> reentering the main loop (that was the point of my earlier qapi-async
> series)

Possibly stupid question: why is it necessary before this patch
(assuming it is, since we call it), and why is it no longer necessary
after?

Oh, see below.

>>
>> If qemu_in_coroutine(), we now run graphic_hw_update() in a bottom half,
>> then yield until the update completes (see graphic_hw_update_done()
>> above).  Can you explain the need for the bottom half?
>
> At least spice rendering is done in a separate thread, completion is async.

When I ask a stupid question like this one, I'm really after the "for
dummies" explanation.  I may be able to figure it out myself with some
effort, but having to put that kind of effort into patch review makes me
grumpy, and once I'm sufficiently grumpy, I don't want to review patches
anymore, let alone merge them.

Oh well, let me try.  We're in the main loop.  We want to trigger a
"graphics update" (whatever that is, doesn't matter) and wait for it to
complete without blocking the main loop.

"Without blocking the main loop" means the QMP coroutine yields.  I'd
naively expect

    QMP coroutine: schedule the job; yield
    whatever gets scheduled: complete the job; wake up QMP coroutine

Now let's examine the "graphics update" interface.

GraphicHwOps callback gfx_update() comes in two flavours:

* synchronous: complete the job, return

* asynchronous: start the job, return immediately,
  graphic_hw_update_done() will get called on job completion

graphic_hw_update() partly hides the difference:

* synchronous: complete the job, call graphic_hw_update_done()

* asynchronous: start the job, return immediately,
  graphic_hw_update_done() will get called on job completion

This lets you treat the synchronous case more like the asynchronous
case.

You use graphic_hw_update_done() to wake up the QMP coroutine.

I think I can now answer my question "why is it okay not to call
graphic_hw_update() anymore when !qemu_in_coroutine()?"

Before the patch, both QMP and HMP:

* with synchronous gfx_update(): we update before we write out the
  screendump.  The screendump is up-to-date.  Both update and write out
  block the main loop.

* with asynchronous gfx_update(): we start updating, but don't wait for
  it to complete before we write out.  This is wrong.  Write out blocks
  the main loop, but update does not.

After the patch:

* QMP with either gfx_update(): we update before we write out the
  screendump.  The screendump is up-to-date.  Neither update nor write
  out block the main loop.  Improvement.

* HMP with either gfx_update(): we don't update before we write out.
  Similarly wrong for asynchronous gfx_update(), regression for
  synchronous gfx_update().  Write out blocks the main loop as before.

Why is the regression okay?

Back to the bottom half.  The way graphic_hw_update() works, the QMP
coroutine can't schedule then yield.  It *has* to yield before
graphic_hw_update() runs.  That means we need a bottom half.

Alright, I'm officially grumpy now.

Please explain the need for a bottom half in a comment.

>> >      surface = qemu_console_surface(con);
>> >      if (!surface) {
>> >          error_setg(errp, "no surface");
>> > @@ -379,7 +397,8 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
>> >          return;
>> >      }
>> >
>> > -    if (!ppm_save(fd, surface, errp)) {
>> > +    image = pixman_image_ref(surface->image);
>> > +    if (!ppm_save(fd, image, errp)) {
>> >          qemu_unlink(filename);
>> >      }
>> >  }
>> > diff --git a/ui/trace-events b/ui/trace-events
>> > index 0dcda393c1..e8726fc969 100644
>> > --- a/ui/trace-events
>> > +++ b/ui/trace-events
>> > @@ -15,7 +15,7 @@ displaysurface_create_pixman(void *display_surface) "surface=%p"
>> >  displaysurface_free(void *display_surface) "surface=%p"
>> >  displaychangelistener_register(void *dcl, const char *name) "%p [ %s ]"
>> >  displaychangelistener_unregister(void *dcl, const char *name) "%p [ %s ]"
>> > -ppm_save(int fd, void *display_surface) "fd=%d surface=%p"
>> > +ppm_save(int fd, void *image) "fd=%d image=%p"
>> >
>> >  # gtk.c
>> >  # gtk-gl-area.c


diff --git a/ui/console.c b/ui/console.c
index 57df3a5439..c5aabf5a5f 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -167,7 +167,6 @@ struct QemuConsole {
     QEMUFIFO out_fifo;
     uint8_t out_fifo_buf[16];
     QEMUTimer *kbd_timer;
-    Coroutine *screendump_co;
 
     QTAILQ_ENTRY(QemuConsole) next;
 };
@@ -261,14 +260,20 @@ static void gui_setup_refresh(DisplayState *ds)
     ds->have_text = have_text;
 }
 
-void graphic_hw_update_done(QemuConsole *con)
+static void graphic_hw_update_done_with_kick(QemuConsole *con,
+                                             Coroutine *kick_me)
 {
-    if (con && con->screendump_co) {
-        aio_co_wake(con->screendump_co);
+    if (kick_me) {
+        aio_co_wake(kick_me);
     }
 }
 
-void graphic_hw_update(QemuConsole *con)
+void graphic_hw_update_done(QemuConsole *con)
+{
+    graphic_hw_update_done_with_kick(con, NULL);
+}
+
+static void graphic_hw_update_with_kick(QemuConsole *con, Coroutine *kick_me)
 {
     bool async = false;
     if (!con) {
@@ -279,10 +284,15 @@ void graphic_hw_update(QemuConsole *con)
         async = con->hw_ops->gfx_update_async;
     }
     if (!async) {
-        graphic_hw_update_done(con);
+        graphic_hw_update_done_with_kick(con, kick_me);
     }
 }
 
+void graphic_hw_update(QemuConsole *con)
+{
+    graphic_hw_update_with_kick(con, NULL);
+}
+
 void graphic_hw_gl_block(QemuConsole *con, bool block)
 {
     assert(con != NULL);
@@ -343,9 +353,16 @@ static bool ppm_save(int fd, pixman_image_t *image, Error **errp)
     return true;
 }
 
-static void graphic_hw_update_bh(void *con)
+typedef struct {
+    QemuConsole *con;
+    Coroutine *kick_me;
+} UpdateBHargs;
+
+static void graphic_hw_update_bh(void *p)
 {
-    graphic_hw_update(con);
+    UpdateBHargs *args = p;
+
+    graphic_hw_update_with_kick(args->con, args->kick_me);
 }
 
 /* may be called in coroutine context or not */
@@ -376,12 +393,13 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
     }
 
     if (qemu_in_coroutine()) {
-        assert(!con->screendump_co);
-        con->screendump_co = qemu_coroutine_self();
+        UpdateBHargs args = {
+            .con = con,
+            .kick_me = qemu_coroutine_self(),
+        };
         aio_bh_schedule_oneshot(qemu_get_aio_context(),
-                                graphic_hw_update_bh, con);
+                                graphic_hw_update_bh, &args);
         qemu_coroutine_yield();
-        con->screendump_co = NULL;
     }
 
     surface = qemu_console_surface(con);


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Kevin Wolf 4 years, 1 month ago
Am 20.02.2020 um 17:01 hat Markus Armbruster geschrieben:
> >> >  void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >                      bool has_head, int64_t head, Error **errp)
> >> >  {
> >> >      QemuConsole *con;
> >> >      DisplaySurface *surface;
> >> > +    g_autoptr(pixman_image_t) image = NULL;
> >> >      int fd;
> >> >
> >> >      if (has_device) {
> >> > @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >          }
> >> >      }
> >> >
> >> > -    graphic_hw_update(con);
> >> > +    if (qemu_in_coroutine()) {
> >> > +        assert(!con->screendump_co);
> >> > +        con->screendump_co = qemu_coroutine_self();
> >> > +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
> >> > +                                graphic_hw_update_bh, con);
> >> > +        qemu_coroutine_yield();
> >> > +        con->screendump_co = NULL;
> >> > +    }
> >>
> >> What if multiple QMP monitors simultaneously screendump?  Hmm, it works
> >> because all execute one after another in the same coroutine
> >> qmp_dispatcher_co.  Implicit mutual exclusion.
> >>
> >> Executing them one after another is bad, because it lets an ill-behaved
> >> QMP command starve *all* QMP monitors.  We do it only out of
> >> (reasonable!) fear of implicit mutual exclusion requirements like the
> >> one you add.
> >>
> >> Let's not add more if we can help it.
> >
> > The situation is not worse than the current blocking handling.
> 
> Really?
> 
> What makes executing multiple qmp_screendump() concurrently (in separate
> threads) or interleaved (in separate coroutines in the same thread)
> unsafe before this patch?

QMP command handlers are guaranteed to run in the main thread with the
BQL held, so there is no concurrency. If you want to change this, you
would have much more complicated problems to solve than in this handler.
I'm not sure it's fair to require thread-safety from one handler when
no other handler is thread safe (except accidentally) and nobody seems
to plan actually calling them from multiple threads.

> >> Your screendump_co is per QemuConsole instead of per QMP monitor only
> >> because you need to find the coroutine in graphic_hw_update_done().  Can
> >> we somehow pass it via function arguments?
> >
> > I think it could be done later, so I suggest a TODO.
> 
> We should avoid making our dependence on implicit mutual exclusion
> worse.  When we do it anyway, a big, fat, ugly comment is definitely
> called for.

Anyway, what I really wanted to add:

This should be easy to solve by having a CoQueue instead of a single
Coroutine pointer. The coroutine would just call qemu_co_queue_wait(),
which adds itself to the queue before it yields and the update
completion would wake up all coroutines that are currently queued with
qemu_co_queue_restart_all().

qemu_co_queue_wait() takes a lock as its second parameter. You don't
need it in this context and can just pass NULL. (This is a lock that
would be dropped while the coroutine is sleeping and automatically
reacquired afterwards.)

> >> In case avoiding the mutual exclusion is impractical: please explain it
> >> in a comment to make it somewhat less implicit.
> 
> It is anything but: see appended patch.

This works, too, but it requires an additional struct. I think the queue
is easier. (Note there is a difference in the mechanism: Your patch
waits for the specific update it triggered, while the CoQueue would wait
for _any_ update to complete. I assume effectively the result is the
same.)

Kevin


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Markus Armbruster 4 years, 1 month ago
Kevin Wolf <kwolf@redhat.com> writes:

> Am 20.02.2020 um 17:01 hat Markus Armbruster geschrieben:
>> >> >  void qmp_screendump(const char *filename, bool has_device, const char *device,
>> >> >                      bool has_head, int64_t head, Error **errp)
>> >> >  {
>> >> >      QemuConsole *con;
>> >> >      DisplaySurface *surface;
>> >> > +    g_autoptr(pixman_image_t) image = NULL;
>> >> >      int fd;
>> >> >
>> >> >      if (has_device) {
>> >> > @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
>> >> >          }
>> >> >      }
>> >> >
>> >> > -    graphic_hw_update(con);
>> >> > +    if (qemu_in_coroutine()) {
>> >> > +        assert(!con->screendump_co);
>> >> > +        con->screendump_co = qemu_coroutine_self();
>> >> > +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
>> >> > +                                graphic_hw_update_bh, con);
>> >> > +        qemu_coroutine_yield();
>> >> > +        con->screendump_co = NULL;
>> >> > +    }
>> >>
>> >> What if multiple QMP monitors simultaneously screendump?  Hmm, it works
>> >> because all execute one after another in the same coroutine
>> >> qmp_dispatcher_co.  Implicit mutual exclusion.
>> >>
>> >> Executing them one after another is bad, because it lets an ill-behaved
>> >> QMP command starve *all* QMP monitors.  We do it only out of
>> >> (reasonable!) fear of implicit mutual exclusion requirements like the
>> >> one you add.
>> >>
>> >> Let's not add more if we can help it.
>> >
>> > The situation is not worse than the current blocking handling.
>> 
>> Really?
>> 
>> What makes executing multiple qmp_screendump() concurrently (in separate
>> threads) or interleaved (in separate coroutines in the same thread)
>> unsafe before this patch?
>
> QMP command handlers are guaranteed to run in the main thread with the
> BQL held, so there is no concurrency. If you want to change this, you
> would have much more complicated problems to solve than in this handler.
> I'm not sure it's fair to require thread-safety from one handler when
> no other handler is thread safe (except accidentally) and nobody seems
> to plan actually calling them from multiple threads.

"Let's not [...] if we can help it." is hardly a "change this or else no
merge" demand.  It is a challenge to find a more elegant solution.

>> >> Your screendump_co is per QemuConsole instead of per QMP monitor only
>> >> because you need to find the coroutine in graphic_hw_update_done().  Can
>> >> we somehow pass it via function arguments?
>> >
>> > I think it could be done later, so I suggest a TODO.
>> 
>> We should avoid making our dependence on implicit mutual exclusion
>> worse.  When we do it anyway, a big, fat, ugly comment is definitely
>> called for.
>
> Anyway, what I really wanted to add:
>
> This should be easy to solve by having a CoQueue instead of a single

Ah, challenge accepted!  Exactly the outcome I was hoping for :)

> Coroutine pointer. The coroutine would just call qemu_co_queue_wait(),
> which adds itself to the queue before it yields and the update
> completion would wake up all coroutines that are currently queued with
> qemu_co_queue_restart_all().
>
> qemu_co_queue_wait() takes a lock as its second parameter. You don't
> need it in this context and can just pass NULL. (This is a lock that
> would be dropped while the coroutine is sleeping and automatically
> reacquired afterwards.)
>
>> >> In case avoiding the mutual exclusion is impractical: please explain it
>> >> in a comment to make it somewhat less implicit.
>> 
>> It is anything but: see appended patch.
>
> This works, too, but it requires an additional struct. I think the queue
> is easier. (Note there is a difference in the mechanism: Your patch
> waits for the specific update it triggered, while the CoQueue would wait
> for _any_ update to complete. I assume effectively the result is the
> same.)

Your idea sounds much nicer to me.  Thanks!


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Marc-André Lureau 4 years, 1 month ago
Hi

On Fri, Feb 21, 2020 at 5:50 PM Markus Armbruster <armbru@redhat.com> wrote:
>
> Kevin Wolf <kwolf@redhat.com> writes:
>
> > Am 20.02.2020 um 17:01 hat Markus Armbruster geschrieben:
> >> >> >  void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >> >                      bool has_head, int64_t head, Error **errp)
> >> >> >  {
> >> >> >      QemuConsole *con;
> >> >> >      DisplaySurface *surface;
> >> >> > +    g_autoptr(pixman_image_t) image = NULL;
> >> >> >      int fd;
> >> >> >
> >> >> >      if (has_device) {
> >> >> > @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >> >          }
> >> >> >      }
> >> >> >
> >> >> > -    graphic_hw_update(con);
> >> >> > +    if (qemu_in_coroutine()) {
> >> >> > +        assert(!con->screendump_co);
> >> >> > +        con->screendump_co = qemu_coroutine_self();
> >> >> > +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
> >> >> > +                                graphic_hw_update_bh, con);
> >> >> > +        qemu_coroutine_yield();
> >> >> > +        con->screendump_co = NULL;
> >> >> > +    }
> >> >>
> >> >> What if multiple QMP monitors simultaneously screendump?  Hmm, it works
> >> >> because all execute one after another in the same coroutine
> >> >> qmp_dispatcher_co.  Implicit mutual exclusion.
> >> >>
> >> >> Executing them one after another is bad, because it lets an ill-behaved
> >> >> QMP command starve *all* QMP monitors.  We do it only out of
> >> >> (reasonable!) fear of implicit mutual exclusion requirements like the
> >> >> one you add.
> >> >>
> >> >> Let's not add more if we can help it.
> >> >
> >> > The situation is not worse than the current blocking handling.
> >>
> >> Really?
> >>
> >> What makes executing multiple qmp_screendump() concurrently (in separate
> >> threads) or interleaved (in separate coroutines in the same thread)
> >> unsafe before this patch?
> >
> > QMP command handlers are guaranteed to run in the main thread with the
> > BQL held, so there is no concurrency. If you want to change this, you
> > would have much more complicated problems to solve than in this handler.
> > I'm not sure it's fair to require thread-safety from one handler when
> > no other handler is thread safe (except accidentally) and nobody seems
> > to plan actually calling them from multiple threads.
>
> "Let's not [...] if we can help it." is hardly a "change this or else no
> merge" demand.  It is a challenge to find a more elegant solution.
>
> >> >> Your screendump_co is per QemuConsole instead of per QMP monitor only
> >> >> because you need to find the coroutine in graphic_hw_update_done().  Can
> >> >> we somehow pass it via function arguments?
> >> >
> >> > I think it could be done later, so I suggest a TODO.
> >>
> >> We should avoid making our dependence on implicit mutual exclusion
> >> worse.  When we do it anyway, a big, fat, ugly comment is definitely
> >> called for.
> >
> > Anyway, what I really wanted to add:
> >
> > This should be easy to solve by having a CoQueue instead of a single
>
> Ah, challenge accepted!  Exactly the outcome I was hoping for :)
>
> > Coroutine pointer. The coroutine would just call qemu_co_queue_wait(),
> > which adds itself to the queue before it yields and the update
> > completion would wake up all coroutines that are currently queued with
> > qemu_co_queue_restart_all().
> >
> > qemu_co_queue_wait() takes a lock as its second parameter. You don't
> > need it in this context and can just pass NULL. (This is a lock that
> > would be dropped while the coroutine is sleeping and automatically
> > reacquired afterwards.)
> >
> >> >> In case avoiding the mutual exclusion is impractical: please explain it
> >> >> in a comment to make it somewhat less implicit.
> >>
> >> It is anything but: see appended patch.
> >
> > This works, too, but it requires an additional struct. I think the queue
> > is easier. (Note there is a difference in the mechanism: Your patch
> > waits for the specific update it triggered, while the CoQueue would wait
> > for _any_ update to complete. I assume effectively the result is the
> > same.)
>
> Your idea sounds much nicer to me.  Thanks!

Similar to the NULL check you asked to remove,
having a CoQueue there would lead to think that several concurrently
running screendump are possible.

Is this a direction we are willing to take?

fwiw, my earlier async series did allow that, and was using a queue
for concurrent screendumps (but without coroutine & CoQueue, since it
was all traditional callback/events-based)



-- 
Marc-André Lureau

Re: [PATCH] console: make QMP screendump use coroutine
Posted by Markus Armbruster 4 years, 1 month ago
Marc-André Lureau <marcandre.lureau@gmail.com> writes:

> Hi
>
> On Fri, Feb 21, 2020 at 5:50 PM Markus Armbruster <armbru@redhat.com> wrote:
>>
>> Kevin Wolf <kwolf@redhat.com> writes:
>>
>> > Am 20.02.2020 um 17:01 hat Markus Armbruster geschrieben:
>> >> >> >  void qmp_screendump(const char *filename, bool has_device, const char *device,
>> >> >> >                      bool has_head, int64_t head, Error **errp)
>> >> >> >  {
>> >> >> >      QemuConsole *con;
>> >> >> >      DisplaySurface *surface;
>> >> >> > +    g_autoptr(pixman_image_t) image = NULL;
>> >> >> >      int fd;
>> >> >> >
>> >> >> >      if (has_device) {
>> >> >> > @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
>> >> >> >          }
>> >> >> >      }
>> >> >> >
>> >> >> > -    graphic_hw_update(con);
>> >> >> > +    if (qemu_in_coroutine()) {
>> >> >> > +        assert(!con->screendump_co);
>> >> >> > +        con->screendump_co = qemu_coroutine_self();
>> >> >> > +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
>> >> >> > +                                graphic_hw_update_bh, con);
>> >> >> > +        qemu_coroutine_yield();
>> >> >> > +        con->screendump_co = NULL;
>> >> >> > +    }
>> >> >>
>> >> >> What if multiple QMP monitors simultaneously screendump?  Hmm, it works
>> >> >> because all execute one after another in the same coroutine
>> >> >> qmp_dispatcher_co.  Implicit mutual exclusion.
>> >> >>
>> >> >> Executing them one after another is bad, because it lets an ill-behaved
>> >> >> QMP command starve *all* QMP monitors.  We do it only out of
>> >> >> (reasonable!) fear of implicit mutual exclusion requirements like the
>> >> >> one you add.
>> >> >>
>> >> >> Let's not add more if we can help it.
>> >> >
>> >> > The situation is not worse than the current blocking handling.
>> >>
>> >> Really?
>> >>
>> >> What makes executing multiple qmp_screendump() concurrently (in separate
>> >> threads) or interleaved (in separate coroutines in the same thread)
>> >> unsafe before this patch?
>> >
>> > QMP command handlers are guaranteed to run in the main thread with the
>> > BQL held, so there is no concurrency. If you want to change this, you
>> > would have much more complicated problems to solve than in this handler.
>> > I'm not sure it's fair to require thread-safety from one handler when
>> > no other handler is thread safe (except accidentally) and nobody seems
>> > to plan actually calling them from multiple threads.
>>
>> "Let's not [...] if we can help it." is hardly a "change this or else no
>> merge" demand.  It is a challenge to find a more elegant solution.
>>
>> >> >> Your screendump_co is per QemuConsole instead of per QMP monitor only
>> >> >> because you need to find the coroutine in graphic_hw_update_done().  Can
>> >> >> we somehow pass it via function arguments?
>> >> >
>> >> > I think it could be done later, so I suggest a TODO.
>> >>
>> >> We should avoid making our dependence on implicit mutual exclusion
>> >> worse.  When we do it anyway, a big, fat, ugly comment is definitely
>> >> called for.
>> >
>> > Anyway, what I really wanted to add:
>> >
>> > This should be easy to solve by having a CoQueue instead of a single
>>
>> Ah, challenge accepted!  Exactly the outcome I was hoping for :)
>>
>> > Coroutine pointer. The coroutine would just call qemu_co_queue_wait(),
>> > which adds itself to the queue before it yields and the update
>> > completion would wake up all coroutines that are currently queued with
>> > qemu_co_queue_restart_all().
>> >
>> > qemu_co_queue_wait() takes a lock as its second parameter. You don't
>> > need it in this context and can just pass NULL. (This is a lock that
>> > would be dropped while the coroutine is sleeping and automatically
>> > reacquired afterwards.)
>> >
>> >> >> In case avoiding the mutual exclusion is impractical: please explain it
>> >> >> in a comment to make it somewhat less implicit.
>> >>
>> >> It is anything but: see appended patch.
>> >
>> > This works, too, but it requires an additional struct. I think the queue
>> > is easier. (Note there is a difference in the mechanism: Your patch
>> > waits for the specific update it triggered, while the CoQueue would wait
>> > for _any_ update to complete. I assume effectively the result is the
>> > same.)
>>
>> Your idea sounds much nicer to me.  Thanks!
>
> Similar to the NULL check you asked to remove,
> having a CoQueue there would lead to think that several concurrently
> running screendump are possible.
>
> Is this a direction we are willing to take?

Let's take a step back.

The actual problem is to find the coroutine in graphic_hw_update_done(),
so you can wake it.

Your solution stores the coroutine in the QemuConsole, because that's
readily available in graphic_hw_update_done().

However, it really, really doesn't belong there, it belongs to the
monitor.  Works anyway only because QMP commands execute one after the
other.

Kevin suggested using a CoQueue to avoid this unspoken dependency.  You
object, because it could make readers assume multiple screendump
commands could run concurrently, which is not the case.

Alright, let's KISS: since there's just one main loop, there's just one
coroutine: @qmp_dispatcher_co.  Let's use that, so the dependency on
"one command after the other" is explicit and obvious.

[...]


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Kevin Wolf 4 years, 1 month ago
Am 02.03.2020 um 15:22 hat Markus Armbruster geschrieben:
> Marc-André Lureau <marcandre.lureau@gmail.com> writes:
> 
> > Hi
> >
> > On Fri, Feb 21, 2020 at 5:50 PM Markus Armbruster <armbru@redhat.com> wrote:
> >>
> >> Kevin Wolf <kwolf@redhat.com> writes:
> >>
> >> > Am 20.02.2020 um 17:01 hat Markus Armbruster geschrieben:
> >> >> >> >  void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >> >> >                      bool has_head, int64_t head, Error **errp)
> >> >> >> >  {
> >> >> >> >      QemuConsole *con;
> >> >> >> >      DisplaySurface *surface;
> >> >> >> > +    g_autoptr(pixman_image_t) image = NULL;
> >> >> >> >      int fd;
> >> >> >> >
> >> >> >> >      if (has_device) {
> >> >> >> > @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >> >> >          }
> >> >> >> >      }
> >> >> >> >
> >> >> >> > -    graphic_hw_update(con);
> >> >> >> > +    if (qemu_in_coroutine()) {
> >> >> >> > +        assert(!con->screendump_co);
> >> >> >> > +        con->screendump_co = qemu_coroutine_self();
> >> >> >> > +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
> >> >> >> > +                                graphic_hw_update_bh, con);
> >> >> >> > +        qemu_coroutine_yield();
> >> >> >> > +        con->screendump_co = NULL;
> >> >> >> > +    }
> >> >> >>
> >> >> >> What if multiple QMP monitors simultaneously screendump?  Hmm, it works
> >> >> >> because all execute one after another in the same coroutine
> >> >> >> qmp_dispatcher_co.  Implicit mutual exclusion.
> >> >> >>
> >> >> >> Executing them one after another is bad, because it lets an ill-behaved
> >> >> >> QMP command starve *all* QMP monitors.  We do it only out of
> >> >> >> (reasonable!) fear of implicit mutual exclusion requirements like the
> >> >> >> one you add.
> >> >> >>
> >> >> >> Let's not add more if we can help it.
> >> >> >
> >> >> > The situation is not worse than the current blocking handling.
> >> >>
> >> >> Really?
> >> >>
> >> >> What makes executing multiple qmp_screendump() concurrently (in separate
> >> >> threads) or interleaved (in separate coroutines in the same thread)
> >> >> unsafe before this patch?
> >> >
> >> > QMP command handlers are guaranteed to run in the main thread with the
> >> > BQL held, so there is no concurrency. If you want to change this, you
> >> > would have much more complicated problems to solve than in this handler.
> >> > I'm not sure it's fair to require thread-safety from one handler when
> >> > no other handler is thread safe (except accidentally) and nobody seems
> >> > to plan actually calling them from multiple threads.
> >>
> >> "Let's not [...] if we can help it." is hardly a "change this or else no
> >> merge" demand.  It is a challenge to find a more elegant solution.
> >>
> >> >> >> Your screendump_co is per QemuConsole instead of per QMP monitor only
> >> >> >> because you need to find the coroutine in graphic_hw_update_done().  Can
> >> >> >> we somehow pass it via function arguments?
> >> >> >
> >> >> > I think it could be done later, so I suggest a TODO.
> >> >>
> >> >> We should avoid making our dependence on implicit mutual exclusion
> >> >> worse.  When we do it anyway, a big, fat, ugly comment is definitely
> >> >> called for.
> >> >
> >> > Anyway, what I really wanted to add:
> >> >
> >> > This should be easy to solve by having a CoQueue instead of a single
> >>
> >> Ah, challenge accepted!  Exactly the outcome I was hoping for :)
> >>
> >> > Coroutine pointer. The coroutine would just call qemu_co_queue_wait(),
> >> > which adds itself to the queue before it yields and the update
> >> > completion would wake up all coroutines that are currently queued with
> >> > qemu_co_queue_restart_all().
> >> >
> >> > qemu_co_queue_wait() takes a lock as its second parameter. You don't
> >> > need it in this context and can just pass NULL. (This is a lock that
> >> > would be dropped while the coroutine is sleeping and automatically
> >> > reacquired afterwards.)
> >> >
> >> >> >> In case avoiding the mutual exclusion is impractical: please explain it
> >> >> >> in a comment to make it somewhat less implicit.
> >> >>
> >> >> It is anything but: see appended patch.
> >> >
> >> > This works, too, but it requires an additional struct. I think the queue
> >> > is easier. (Note there is a difference in the mechanism: Your patch
> >> > waits for the specific update it triggered, while the CoQueue would wait
> >> > for _any_ update to complete. I assume effectively the result is the
> >> > same.)
> >>
> >> Your idea sounds much nicer to me.  Thanks!
> >
> > Similar to the NULL check you asked to remove,
> > having a CoQueue there would lead to think that several concurrently
> > running screendump are possible.
> >
> > Is this a direction we are willing to take?
> 
> Let's take a step back.
> 
> The actual problem is to find the coroutine in graphic_hw_update_done(),
> so you can wake it.
> 
> Your solution stores the coroutine in the QemuConsole, because that's
> readily available in graphic_hw_update_done().
> 
> However, it really, really doesn't belong there, it belongs to the
> monitor.  Works anyway only because QMP commands execute one after the
> other.
> 
> Kevin suggested using a CoQueue to avoid this unspoken dependency.  You
> object, because it could make readers assume multiple screendump
> commands could run concurrently, which is not the case.
> 
> Alright, let's KISS: since there's just one main loop, there's just one
> coroutine: @qmp_dispatcher_co.  Let's use that, so the dependency on
> "one command after the other" is explicit and obvious.

Ugh... If you choose that this is the way to go, please add an assertion
at least that we are indeed in qmp_dispatcher_co before yielding.

Kevin


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Markus Armbruster 4 years, 1 month ago
Kevin Wolf <kwolf@redhat.com> writes:

> Am 02.03.2020 um 15:22 hat Markus Armbruster geschrieben:
>> Marc-André Lureau <marcandre.lureau@gmail.com> writes:
>> 
>> > Hi
>> >
>> > On Fri, Feb 21, 2020 at 5:50 PM Markus Armbruster <armbru@redhat.com> wrote:
>> >>
>> >> Kevin Wolf <kwolf@redhat.com> writes:
>> >>
>> >> > Am 20.02.2020 um 17:01 hat Markus Armbruster geschrieben:
>> >> >> >> >  void qmp_screendump(const char *filename, bool has_device, const char *device,
>> >> >> >> >                      bool has_head, int64_t head, Error **errp)
>> >> >> >> >  {
>> >> >> >> >      QemuConsole *con;
>> >> >> >> >      DisplaySurface *surface;
>> >> >> >> > +    g_autoptr(pixman_image_t) image = NULL;
>> >> >> >> >      int fd;
>> >> >> >> >
>> >> >> >> >      if (has_device) {
>> >> >> >> > @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
>> >> >> >> >          }
>> >> >> >> >      }
>> >> >> >> >
>> >> >> >> > -    graphic_hw_update(con);
>> >> >> >> > +    if (qemu_in_coroutine()) {
>> >> >> >> > +        assert(!con->screendump_co);
>> >> >> >> > +        con->screendump_co = qemu_coroutine_self();
>> >> >> >> > +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
>> >> >> >> > +                                graphic_hw_update_bh, con);
>> >> >> >> > +        qemu_coroutine_yield();
>> >> >> >> > +        con->screendump_co = NULL;
>> >> >> >> > +    }
>> >> >> >>
>> >> >> >> What if multiple QMP monitors simultaneously screendump?  Hmm, it works
>> >> >> >> because all execute one after another in the same coroutine
>> >> >> >> qmp_dispatcher_co.  Implicit mutual exclusion.
>> >> >> >>
>> >> >> >> Executing them one after another is bad, because it lets an ill-behaved
>> >> >> >> QMP command starve *all* QMP monitors.  We do it only out of
>> >> >> >> (reasonable!) fear of implicit mutual exclusion requirements like the
>> >> >> >> one you add.
>> >> >> >>
>> >> >> >> Let's not add more if we can help it.
>> >> >> >
>> >> >> > The situation is not worse than the current blocking handling.
>> >> >>
>> >> >> Really?
>> >> >>
>> >> >> What makes executing multiple qmp_screendump() concurrently (in separate
>> >> >> threads) or interleaved (in separate coroutines in the same thread)
>> >> >> unsafe before this patch?
>> >> >
>> >> > QMP command handlers are guaranteed to run in the main thread with the
>> >> > BQL held, so there is no concurrency. If you want to change this, you
>> >> > would have much more complicated problems to solve than in this handler.
>> >> > I'm not sure it's fair to require thread-safety from one handler when
>> >> > no other handler is thread safe (except accidentally) and nobody seems
>> >> > to plan actually calling them from multiple threads.
>> >>
>> >> "Let's not [...] if we can help it." is hardly a "change this or else no
>> >> merge" demand.  It is a challenge to find a more elegant solution.
>> >>
>> >> >> >> Your screendump_co is per QemuConsole instead of per QMP monitor only
>> >> >> >> because you need to find the coroutine in graphic_hw_update_done().  Can
>> >> >> >> we somehow pass it via function arguments?
>> >> >> >
>> >> >> > I think it could be done later, so I suggest a TODO.
>> >> >>
>> >> >> We should avoid making our dependence on implicit mutual exclusion
>> >> >> worse.  When we do it anyway, a big, fat, ugly comment is definitely
>> >> >> called for.
>> >> >
>> >> > Anyway, what I really wanted to add:
>> >> >
>> >> > This should be easy to solve by having a CoQueue instead of a single
>> >>
>> >> Ah, challenge accepted!  Exactly the outcome I was hoping for :)
>> >>
>> >> > Coroutine pointer. The coroutine would just call qemu_co_queue_wait(),
>> >> > which adds itself to the queue before it yields and the update
>> >> > completion would wake up all coroutines that are currently queued with
>> >> > qemu_co_queue_restart_all().
>> >> >
>> >> > qemu_co_queue_wait() takes a lock as its second parameter. You don't
>> >> > need it in this context and can just pass NULL. (This is a lock that
>> >> > would be dropped while the coroutine is sleeping and automatically
>> >> > reacquired afterwards.)
>> >> >
>> >> >> >> In case avoiding the mutual exclusion is impractical: please explain it
>> >> >> >> in a comment to make it somewhat less implicit.
>> >> >>
>> >> >> It is anything but: see appended patch.
>> >> >
>> >> > This works, too, but it requires an additional struct. I think the queue
>> >> > is easier. (Note there is a difference in the mechanism: Your patch
>> >> > waits for the specific update it triggered, while the CoQueue would wait
>> >> > for _any_ update to complete. I assume effectively the result is the
>> >> > same.)
>> >>
>> >> Your idea sounds much nicer to me.  Thanks!
>> >
>> > Similar to the NULL check you asked to remove,
>> > having a CoQueue there would lead to think that several concurrently
>> > running screendump are possible.
>> >
>> > Is this a direction we are willing to take?
>> 
>> Let's take a step back.
>> 
>> The actual problem is to find the coroutine in graphic_hw_update_done(),
>> so you can wake it.
>> 
>> Your solution stores the coroutine in the QemuConsole, because that's
>> readily available in graphic_hw_update_done().
>> 
>> However, it really, really doesn't belong there, it belongs to the
>> monitor.  Works anyway only because QMP commands execute one after the
>> other.
>> 
>> Kevin suggested using a CoQueue to avoid this unspoken dependency.  You
>> object, because it could make readers assume multiple screendump
>> commands could run concurrently, which is not the case.
>> 
>> Alright, let's KISS: since there's just one main loop, there's just one
>> coroutine: @qmp_dispatcher_co.  Let's use that, so the dependency on
>> "one command after the other" is explicit and obvious.
>
> Ugh... If you choose that this is the way to go, please add an assertion
> at least that we are indeed in qmp_dispatcher_co before yielding.

No objection.

To apply the QMP coroutine infrastructure for 5.0, I need a user.  We
have two: block_resize from Kevin, and screendump from Marc-André.
Neither is quite ready, yet.  I'll wait for a respin of either one.


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Marc-André Lureau 4 years, 1 month ago
Hi

On Tue, Mar 3, 2020 at 8:41 AM Markus Armbruster <armbru@redhat.com> wrote:
>
> Kevin Wolf <kwolf@redhat.com> writes:
>
> > Am 02.03.2020 um 15:22 hat Markus Armbruster geschrieben:
> >> Marc-André Lureau <marcandre.lureau@gmail.com> writes:
> >>
> >> > Hi
> >> >
> >> > On Fri, Feb 21, 2020 at 5:50 PM Markus Armbruster <armbru@redhat.com> wrote:
> >> >>
> >> >> Kevin Wolf <kwolf@redhat.com> writes:
> >> >>
> >> >> > Am 20.02.2020 um 17:01 hat Markus Armbruster geschrieben:
> >> >> >> >> >  void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >> >> >> >                      bool has_head, int64_t head, Error **errp)
> >> >> >> >> >  {
> >> >> >> >> >      QemuConsole *con;
> >> >> >> >> >      DisplaySurface *surface;
> >> >> >> >> > +    g_autoptr(pixman_image_t) image = NULL;
> >> >> >> >> >      int fd;
> >> >> >> >> >
> >> >> >> >> >      if (has_device) {
> >> >> >> >> > @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >> >> >> >          }
> >> >> >> >> >      }
> >> >> >> >> >
> >> >> >> >> > -    graphic_hw_update(con);
> >> >> >> >> > +    if (qemu_in_coroutine()) {
> >> >> >> >> > +        assert(!con->screendump_co);
> >> >> >> >> > +        con->screendump_co = qemu_coroutine_self();
> >> >> >> >> > +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
> >> >> >> >> > +                                graphic_hw_update_bh, con);
> >> >> >> >> > +        qemu_coroutine_yield();
> >> >> >> >> > +        con->screendump_co = NULL;
> >> >> >> >> > +    }
> >> >> >> >>
> >> >> >> >> What if multiple QMP monitors simultaneously screendump?  Hmm, it works
> >> >> >> >> because all execute one after another in the same coroutine
> >> >> >> >> qmp_dispatcher_co.  Implicit mutual exclusion.
> >> >> >> >>
> >> >> >> >> Executing them one after another is bad, because it lets an ill-behaved
> >> >> >> >> QMP command starve *all* QMP monitors.  We do it only out of
> >> >> >> >> (reasonable!) fear of implicit mutual exclusion requirements like the
> >> >> >> >> one you add.
> >> >> >> >>
> >> >> >> >> Let's not add more if we can help it.
> >> >> >> >
> >> >> >> > The situation is not worse than the current blocking handling.
> >> >> >>
> >> >> >> Really?
> >> >> >>
> >> >> >> What makes executing multiple qmp_screendump() concurrently (in separate
> >> >> >> threads) or interleaved (in separate coroutines in the same thread)
> >> >> >> unsafe before this patch?
> >> >> >
> >> >> > QMP command handlers are guaranteed to run in the main thread with the
> >> >> > BQL held, so there is no concurrency. If you want to change this, you
> >> >> > would have much more complicated problems to solve than in this handler.
> >> >> > I'm not sure it's fair to require thread-safety from one handler when
> >> >> > no other handler is thread safe (except accidentally) and nobody seems
> >> >> > to plan actually calling them from multiple threads.
> >> >>
> >> >> "Let's not [...] if we can help it." is hardly a "change this or else no
> >> >> merge" demand.  It is a challenge to find a more elegant solution.
> >> >>
> >> >> >> >> Your screendump_co is per QemuConsole instead of per QMP monitor only
> >> >> >> >> because you need to find the coroutine in graphic_hw_update_done().  Can
> >> >> >> >> we somehow pass it via function arguments?
> >> >> >> >
> >> >> >> > I think it could be done later, so I suggest a TODO.
> >> >> >>
> >> >> >> We should avoid making our dependence on implicit mutual exclusion
> >> >> >> worse.  When we do it anyway, a big, fat, ugly comment is definitely
> >> >> >> called for.
> >> >> >
> >> >> > Anyway, what I really wanted to add:
> >> >> >
> >> >> > This should be easy to solve by having a CoQueue instead of a single
> >> >>
> >> >> Ah, challenge accepted!  Exactly the outcome I was hoping for :)
> >> >>
> >> >> > Coroutine pointer. The coroutine would just call qemu_co_queue_wait(),
> >> >> > which adds itself to the queue before it yields and the update
> >> >> > completion would wake up all coroutines that are currently queued with
> >> >> > qemu_co_queue_restart_all().
> >> >> >
> >> >> > qemu_co_queue_wait() takes a lock as its second parameter. You don't
> >> >> > need it in this context and can just pass NULL. (This is a lock that
> >> >> > would be dropped while the coroutine is sleeping and automatically
> >> >> > reacquired afterwards.)
> >> >> >
> >> >> >> >> In case avoiding the mutual exclusion is impractical: please explain it
> >> >> >> >> in a comment to make it somewhat less implicit.
> >> >> >>
> >> >> >> It is anything but: see appended patch.
> >> >> >
> >> >> > This works, too, but it requires an additional struct. I think the queue
> >> >> > is easier. (Note there is a difference in the mechanism: Your patch
> >> >> > waits for the specific update it triggered, while the CoQueue would wait
> >> >> > for _any_ update to complete. I assume effectively the result is the
> >> >> > same.)
> >> >>
> >> >> Your idea sounds much nicer to me.  Thanks!
> >> >
> >> > Similar to the NULL check you asked to remove,
> >> > having a CoQueue there would lead to think that several concurrently
> >> > running screendump are possible.
> >> >
> >> > Is this a direction we are willing to take?
> >>
> >> Let's take a step back.
> >>
> >> The actual problem is to find the coroutine in graphic_hw_update_done(),
> >> so you can wake it.
> >>
> >> Your solution stores the coroutine in the QemuConsole, because that's
> >> readily available in graphic_hw_update_done().
> >>
> >> However, it really, really doesn't belong there, it belongs to the
> >> monitor.  Works anyway only because QMP commands execute one after the
> >> other.
> >>
> >> Kevin suggested using a CoQueue to avoid this unspoken dependency.  You
> >> object, because it could make readers assume multiple screendump
> >> commands could run concurrently, which is not the case.
> >>
> >> Alright, let's KISS: since there's just one main loop, there's just one
> >> coroutine: @qmp_dispatcher_co.  Let's use that, so the dependency on
> >> "one command after the other" is explicit and obvious.
> >
> > Ugh... If you choose that this is the way to go, please add an assertion
> > at least that we are indeed in qmp_dispatcher_co before yielding.
>
> No objection.
>
> To apply the QMP coroutine infrastructure for 5.0, I need a user.  We
> have two: block_resize from Kevin, and screendump from Marc-André.
> Neither is quite ready, yet.  I'll wait for a respin of either one.
>

Is this the change you expect?

diff --git a/ui/console.c b/ui/console.c
index 57df3a5439..d6a8bf0cee 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -167,7 +167,7 @@ struct QemuConsole {
     QEMUFIFO out_fifo;
     uint8_t out_fifo_buf[16];
     QEMUTimer *kbd_timer;
-    Coroutine *screendump_co;
+    bool wake_qmp_dispatcher_on_update;

     QTAILQ_ENTRY(QemuConsole) next;
 };
@@ -263,8 +263,8 @@ static void gui_setup_refresh(DisplayState *ds)

 void graphic_hw_update_done(QemuConsole *con)
 {
-    if (con && con->screendump_co) {
-        aio_co_wake(con->screendump_co);
+    if (con->wake_qmp_dispatcher_on_update) {
+        aio_co_wake(qmp_dispatcher_co);
     }
 }

@@ -376,12 +376,15 @@ void qmp_screendump(const char *filename, bool
has_device, const char *device,
     }

     if (qemu_in_coroutine()) {
-        assert(!con->screendump_co);
-        con->screendump_co = qemu_coroutine_self();
+        /*
+         * The coroutine code is generic, but we are supposed to be on
+         * the QMP dispatcher coroutine, and we will resume only that now.
+         */
+        assert(qemu_coroutine_self() == qmp_dispatcher_co);
+        con->wake_qmp_dispatcher_on_update = true;
         aio_bh_schedule_oneshot(qemu_get_aio_context(),
                                 graphic_hw_update_bh, con);
         qemu_coroutine_yield();
-        con->screendump_co = NULL;
+        con->wake_qmp_dispatcher_on_update = false;
     }

-- 
Marc-André Lureau

Re: [PATCH] console: make QMP screendump use coroutine
Posted by Markus Armbruster 4 years, 1 month ago
Marc-André Lureau <marcandre.lureau@gmail.com> writes:

> Hi
>
> On Tue, Mar 3, 2020 at 8:41 AM Markus Armbruster <armbru@redhat.com> wrote:
[...]
>> >> Let's take a step back.
>> >>
>> >> The actual problem is to find the coroutine in graphic_hw_update_done(),
>> >> so you can wake it.
>> >>
>> >> Your solution stores the coroutine in the QemuConsole, because that's
>> >> readily available in graphic_hw_update_done().
>> >>
>> >> However, it really, really doesn't belong there, it belongs to the
>> >> monitor.  Works anyway only because QMP commands execute one after the
>> >> other.

As discussed in the "[PATCH v4 1/4] qapi: Add a 'coroutine' flag for
commands" sub-thread, HMP commands may execute interleaved.  Your code
still works, because it only ever abuses QemuConsole with QMP.  But it's
brittle.

Looks like we'll change HMP not to run interleaved.  That adds a belt to
the suspenders.  You might argue that's robust enough.

But it's not just the brittleness I dislike.  Storing per-monitor-
command data in QemuConsole is ugly as sin.  Arguing that it works
because commands are strictly serialized, and that burying one more
dependence on such serialization deep in command code won't make the
situation appreciably worse, doesn't change the fact that QemuConsole
has no business holding per-monitor-command data.

>> >>
>> >> Kevin suggested using a CoQueue to avoid this unspoken dependency.  You
>> >> object, because it could make readers assume multiple screendump
>> >> commands could run concurrently, which is not the case.
>> >>
>> >> Alright, let's KISS: since there's just one main loop, there's just one
>> >> coroutine: @qmp_dispatcher_co.  Let's use that, so the dependency on
>> >> "one command after the other" is explicit and obvious.
>> >
>> > Ugh... If you choose that this is the way to go, please add an assertion
>> > at least that we are indeed in qmp_dispatcher_co before yielding.
>>
>> No objection.
>>
>> To apply the QMP coroutine infrastructure for 5.0, I need a user.  We
>> have two: block_resize from Kevin, and screendump from Marc-André.
>> Neither is quite ready, yet.  I'll wait for a respin of either one.
>>
>
> Is this the change you expect?
>
> diff --git a/ui/console.c b/ui/console.c
> index 57df3a5439..d6a8bf0cee 100644
> --- a/ui/console.c
> +++ b/ui/console.c
> @@ -167,7 +167,7 @@ struct QemuConsole {
>      QEMUFIFO out_fifo;
>      uint8_t out_fifo_buf[16];
>      QEMUTimer *kbd_timer;
> -    Coroutine *screendump_co;
> +    bool wake_qmp_dispatcher_on_update;
>
>      QTAILQ_ENTRY(QemuConsole) next;
>  };

No, because it still stores per-command data in QemuConsole.  You need
to, because...

> @@ -263,8 +263,8 @@ static void gui_setup_refresh(DisplayState *ds)
>
>  void graphic_hw_update_done(QemuConsole *con)
>  {
> -    if (con && con->screendump_co) {
> -        aio_co_wake(con->screendump_co);
> +    if (con->wake_qmp_dispatcher_on_update) {
> +        aio_co_wake(qmp_dispatcher_co);

... you may call aio_co_wake() only while @qmp_dispatcher_co is waiting
for it after yielding ...

>      }
>  }
>
> @@ -376,12 +376,15 @@ void qmp_screendump(const char *filename, bool
> has_device, const char *device,
>      }
>
>      if (qemu_in_coroutine()) {
> -        assert(!con->screendump_co);
> -        con->screendump_co = qemu_coroutine_self();
> +        /*
> +         * The coroutine code is generic, but we are supposed to be on
> +         * the QMP dispatcher coroutine, and we will resume only that now.
> +         */
> +        assert(qemu_coroutine_self() == qmp_dispatcher_co);
> +        con->wake_qmp_dispatcher_on_update = true;
>          aio_bh_schedule_oneshot(qemu_get_aio_context(),
>                                  graphic_hw_update_bh, con);
>          qemu_coroutine_yield();

... here.  I missed that need when I suggested to use
@qmp_dispatcher_co.  Sorry.

> -        con->screendump_co = NULL;
> +        con->wake_qmp_dispatcher_on_update = false;
>      }

Have a look at qxl, the only provider of asynchronous .gfx_update().
The actual work is done in qxl-render.c.  qxl_render_update(),
qxl_render_update_area_bh(), qxl_render_update_area_unlocked(),
qxl_render_update_area_done() cooperate carefully to support multiple
updates in flight.

I guess that's necessary because we also call graphic_hw_update() from
display code such as ui/vnc.c and ui/spice-display.c.

Before your patch, none of these users waits for an asynchronous update
to complete, as far as I can tell.  Afterwards, QMP screendump does.
Whether more users should I can't tell.  Gerd, can you?

Your patch communicates completion to screendump by making
graphic_hw_update() wake a coroutine.  It stores the coroutine in
QemuConsole, exploiting that only one call site actually waits for an
asynchronous update to complete, and that caller is never reentered.

This new mechanism is not usable for any other caller, unless it somehow
synchronizes with screendump to avoid reentrance.

Shouldn't we offer a more generally useful way to wait for asynchronous
update to complete?  Kevin's idea to use a queue of waiters sounds more
appropriate than ever to me.


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Marc-André Lureau 4 years, 1 month ago
Hi

On Fri, Mar 6, 2020 at 9:44 AM Markus Armbruster <armbru@redhat.com> wrote:
>
> Marc-André Lureau <marcandre.lureau@gmail.com> writes:
>
> > Hi
> >
> > On Tue, Mar 3, 2020 at 8:41 AM Markus Armbruster <armbru@redhat.com> wrote:
> [...]
> >> >> Let's take a step back.
> >> >>
> >> >> The actual problem is to find the coroutine in graphic_hw_update_done(),
> >> >> so you can wake it.
> >> >>
> >> >> Your solution stores the coroutine in the QemuConsole, because that's
> >> >> readily available in graphic_hw_update_done().
> >> >>
> >> >> However, it really, really doesn't belong there, it belongs to the
> >> >> monitor.  Works anyway only because QMP commands execute one after the
> >> >> other.
>
> As discussed in the "[PATCH v4 1/4] qapi: Add a 'coroutine' flag for
> commands" sub-thread, HMP commands may execute interleaved.  Your code
> still works, because it only ever abuses QemuConsole with QMP.  But it's
> brittle.
>
> Looks like we'll change HMP not to run interleaved.  That adds a belt to
> the suspenders.  You might argue that's robust enough.
>
> But it's not just the brittleness I dislike.  Storing per-monitor-
> command data in QemuConsole is ugly as sin.  Arguing that it works
> because commands are strictly serialized, and that burying one more
> dependence on such serialization deep in command code won't make the
> situation appreciably worse, doesn't change the fact that QemuConsole
> has no business holding per-monitor-command data.

It is data (the monitor coroutine) associated with an event handler
(graphic-update).

Someone has to hold the handler/data, and the console seems appropriate.

We could abstract this a bit, for ex, having a GHookList, but as long
as there is only one handler, it's unnecessary.

>
> >> >>
> >> >> Kevin suggested using a CoQueue to avoid this unspoken dependency.  You
> >> >> object, because it could make readers assume multiple screendump
> >> >> commands could run concurrently, which is not the case.
> >> >>
> >> >> Alright, let's KISS: since there's just one main loop, there's just one
> >> >> coroutine: @qmp_dispatcher_co.  Let's use that, so the dependency on
> >> >> "one command after the other" is explicit and obvious.
> >> >
> >> > Ugh... If you choose that this is the way to go, please add an assertion
> >> > at least that we are indeed in qmp_dispatcher_co before yielding.
> >>
> >> No objection.
> >>
> >> To apply the QMP coroutine infrastructure for 5.0, I need a user.  We
> >> have two: block_resize from Kevin, and screendump from Marc-André.
> >> Neither is quite ready, yet.  I'll wait for a respin of either one.
> >>
> >
> > Is this the change you expect?
> >
> > diff --git a/ui/console.c b/ui/console.c
> > index 57df3a5439..d6a8bf0cee 100644
> > --- a/ui/console.c
> > +++ b/ui/console.c
> > @@ -167,7 +167,7 @@ struct QemuConsole {
> >      QEMUFIFO out_fifo;
> >      uint8_t out_fifo_buf[16];
> >      QEMUTimer *kbd_timer;
> > -    Coroutine *screendump_co;
> > +    bool wake_qmp_dispatcher_on_update;
> >
> >      QTAILQ_ENTRY(QemuConsole) next;
> >  };
>
> No, because it still stores per-command data in QemuConsole.  You need
> to, because...
>
> > @@ -263,8 +263,8 @@ static void gui_setup_refresh(DisplayState *ds)
> >
> >  void graphic_hw_update_done(QemuConsole *con)
> >  {
> > -    if (con && con->screendump_co) {
> > -        aio_co_wake(con->screendump_co);
> > +    if (con->wake_qmp_dispatcher_on_update) {
> > +        aio_co_wake(qmp_dispatcher_co);
>
> ... you may call aio_co_wake() only while @qmp_dispatcher_co is waiting
> for it after yielding ...
>
> >      }
> >  }
> >
> > @@ -376,12 +376,15 @@ void qmp_screendump(const char *filename, bool
> > has_device, const char *device,
> >      }
> >
> >      if (qemu_in_coroutine()) {
> > -        assert(!con->screendump_co);
> > -        con->screendump_co = qemu_coroutine_self();
> > +        /*
> > +         * The coroutine code is generic, but we are supposed to be on
> > +         * the QMP dispatcher coroutine, and we will resume only that now.
> > +         */
> > +        assert(qemu_coroutine_self() == qmp_dispatcher_co);
> > +        con->wake_qmp_dispatcher_on_update = true;
> >          aio_bh_schedule_oneshot(qemu_get_aio_context(),
> >                                  graphic_hw_update_bh, con);
> >          qemu_coroutine_yield();
>
> ... here.  I missed that need when I suggested to use
> @qmp_dispatcher_co.  Sorry.
>
> > -        con->screendump_co = NULL;
> > +        con->wake_qmp_dispatcher_on_update = false;
> >      }
>
> Have a look at qxl, the only provider of asynchronous .gfx_update().
> The actual work is done in qxl-render.c.  qxl_render_update(),
> qxl_render_update_area_bh(), qxl_render_update_area_unlocked(),
> qxl_render_update_area_done() cooperate carefully to support multiple
> updates in flight.
>
> I guess that's necessary because we also call graphic_hw_update() from
> display code such as ui/vnc.c and ui/spice-display.c.
>
> Before your patch, none of these users waits for an asynchronous update
> to complete, as far as I can tell.  Afterwards, QMP screendump does.
> Whether more users should I can't tell.  Gerd, can you?
>
> Your patch communicates completion to screendump by making
> graphic_hw_update() wake a coroutine.  It stores the coroutine in
> QemuConsole, exploiting that only one call site actually waits for an
> asynchronous update to complete, and that caller is never reentered.
>
> This new mechanism is not usable for any other caller, unless it somehow
> synchronizes with screendump to avoid reentrance.
>
> Shouldn't we offer a more generally useful way to wait for asynchronous
> update to complete?  Kevin's idea to use a queue of waiters sounds more
> appropriate than ever to me.
>

A CoQueue is a queue of coroutine. Similarly to the GHook suggestion,
I don't see much point as long as there is a single known handler.
Covering it through those abstractions will just lead to wrong
assumptions or code harder to read imho.

-- 
Marc-André Lureau

Re: [PATCH] console: make QMP screendump use coroutine
Posted by Markus Armbruster 4 years, 1 month ago
Marc-André Lureau <marcandre.lureau@gmail.com> writes:

> Hi
>
> On Fri, Mar 6, 2020 at 9:44 AM Markus Armbruster <armbru@redhat.com> wrote:
>>
>> Marc-André Lureau <marcandre.lureau@gmail.com> writes:
>>
>> > Hi
>> >
>> > On Tue, Mar 3, 2020 at 8:41 AM Markus Armbruster <armbru@redhat.com> wrote:
>> [...]
>> >> >> Let's take a step back.
>> >> >>
>> >> >> The actual problem is to find the coroutine in graphic_hw_update_done(),
>> >> >> so you can wake it.
>> >> >>
>> >> >> Your solution stores the coroutine in the QemuConsole, because that's
>> >> >> readily available in graphic_hw_update_done().
>> >> >>
>> >> >> However, it really, really doesn't belong there, it belongs to the
>> >> >> monitor.  Works anyway only because QMP commands execute one after the
>> >> >> other.
>>
>> As discussed in the "[PATCH v4 1/4] qapi: Add a 'coroutine' flag for
>> commands" sub-thread, HMP commands may execute interleaved.  Your code
>> still works, because it only ever abuses QemuConsole with QMP.  But it's
>> brittle.
>>
>> Looks like we'll change HMP not to run interleaved.  That adds a belt to
>> the suspenders.  You might argue that's robust enough.
>>
>> But it's not just the brittleness I dislike.  Storing per-monitor-
>> command data in QemuConsole is ugly as sin.  Arguing that it works
>> because commands are strictly serialized, and that burying one more
>> dependence on such serialization deep in command code won't make the
>> situation appreciably worse, doesn't change the fact that QemuConsole
>> has no business holding per-monitor-command data.
>
> It is data (the monitor coroutine) associated with an event handler
> (graphic-update).
>
> Someone has to hold the handler/data, and the console seems appropriate.
>
> We could abstract this a bit, for ex, having a GHookList, but as long
> as there is only one handler, it's unnecessary.

The correctness argument is non-local and relies on current limitations
of both QMP and HMP:

* QMP never interleaves commands execution, not even with multiple QMP
  monitors.  Complete HMP commands can still be interleaved with a QMP
  command.

* QMP executes commands marked 'coroutine' in a coroutine.  HMP does not
  execute commands in coroutines.

* qmp_screendump() carefully avoids the graphic_hw_update() machinery
  for HMP.  It uses "running in coroutine" as a proxy for "HMP".

* No other user of the graphic_hw_update() machinery wants
  graphic_hw_update_done() to wake up a coroutine.

* Therefore, at any time no more than one such update is for a user that
  wants a coroutine woken up.

* Therefore, storing the coroutine to be woken up in QemuConsole is
  safe.

If you insist that's just fine, please add a comment with the
correctness argument, and get Gerd's blessing for it.

I'd rather remove the need for such a longwinded and brittle argument,
but I'm not the maintainer of ui/ and hw/display/, Gerd is.

>
>>
>> >> >>
>> >> >> Kevin suggested using a CoQueue to avoid this unspoken dependency.  You
>> >> >> object, because it could make readers assume multiple screendump
>> >> >> commands could run concurrently, which is not the case.
>> >> >>
>> >> >> Alright, let's KISS: since there's just one main loop, there's just one
>> >> >> coroutine: @qmp_dispatcher_co.  Let's use that, so the dependency on
>> >> >> "one command after the other" is explicit and obvious.
>> >> >
>> >> > Ugh... If you choose that this is the way to go, please add an assertion
>> >> > at least that we are indeed in qmp_dispatcher_co before yielding.
>> >>
>> >> No objection.
>> >>
>> >> To apply the QMP coroutine infrastructure for 5.0, I need a user.  We
>> >> have two: block_resize from Kevin, and screendump from Marc-André.
>> >> Neither is quite ready, yet.  I'll wait for a respin of either one.
>> >>
>> >
>> > Is this the change you expect?
>> >
>> > diff --git a/ui/console.c b/ui/console.c
>> > index 57df3a5439..d6a8bf0cee 100644
>> > --- a/ui/console.c
>> > +++ b/ui/console.c
>> > @@ -167,7 +167,7 @@ struct QemuConsole {
>> >      QEMUFIFO out_fifo;
>> >      uint8_t out_fifo_buf[16];
>> >      QEMUTimer *kbd_timer;
>> > -    Coroutine *screendump_co;
>> > +    bool wake_qmp_dispatcher_on_update;
>> >
>> >      QTAILQ_ENTRY(QemuConsole) next;
>> >  };
>>
>> No, because it still stores per-command data in QemuConsole.  You need
>> to, because...
>>
>> > @@ -263,8 +263,8 @@ static void gui_setup_refresh(DisplayState *ds)
>> >
>> >  void graphic_hw_update_done(QemuConsole *con)
>> >  {
>> > -    if (con && con->screendump_co) {
>> > -        aio_co_wake(con->screendump_co);
>> > +    if (con->wake_qmp_dispatcher_on_update) {
>> > +        aio_co_wake(qmp_dispatcher_co);
>>
>> ... you may call aio_co_wake() only while @qmp_dispatcher_co is waiting
>> for it after yielding ...
>>
>> >      }
>> >  }
>> >
>> > @@ -376,12 +376,15 @@ void qmp_screendump(const char *filename, bool
>> > has_device, const char *device,
>> >      }
>> >
>> >      if (qemu_in_coroutine()) {
>> > -        assert(!con->screendump_co);
>> > -        con->screendump_co = qemu_coroutine_self();
>> > +        /*
>> > +         * The coroutine code is generic, but we are supposed to be on
>> > +         * the QMP dispatcher coroutine, and we will resume only that now.
>> > +         */
>> > +        assert(qemu_coroutine_self() == qmp_dispatcher_co);
>> > +        con->wake_qmp_dispatcher_on_update = true;
>> >          aio_bh_schedule_oneshot(qemu_get_aio_context(),
>> >                                  graphic_hw_update_bh, con);
>> >          qemu_coroutine_yield();
>>
>> ... here.  I missed that need when I suggested to use
>> @qmp_dispatcher_co.  Sorry.
>>
>> > -        con->screendump_co = NULL;
>> > +        con->wake_qmp_dispatcher_on_update = false;
>> >      }
>>
>> Have a look at qxl, the only provider of asynchronous .gfx_update().
>> The actual work is done in qxl-render.c.  qxl_render_update(),
>> qxl_render_update_area_bh(), qxl_render_update_area_unlocked(),
>> qxl_render_update_area_done() cooperate carefully to support multiple
>> updates in flight.
>>
>> I guess that's necessary because we also call graphic_hw_update() from
>> display code such as ui/vnc.c and ui/spice-display.c.
>>
>> Before your patch, none of these users waits for an asynchronous update
>> to complete, as far as I can tell.  Afterwards, QMP screendump does.
>> Whether more users should I can't tell.  Gerd, can you?
>>
>> Your patch communicates completion to screendump by making
>> graphic_hw_update() wake a coroutine.  It stores the coroutine in
>> QemuConsole, exploiting that only one call site actually waits for an
>> asynchronous update to complete, and that caller is never reentered.
>>
>> This new mechanism is not usable for any other caller, unless it somehow
>> synchronizes with screendump to avoid reentrance.
>>
>> Shouldn't we offer a more generally useful way to wait for asynchronous
>> update to complete?  Kevin's idea to use a queue of waiters sounds more
>> appropriate than ever to me.
>>
>
> A CoQueue is a queue of coroutine. Similarly to the GHook suggestion,
> I don't see much point as long as there is a single known handler.
> Covering it through those abstractions will just lead to wrong
> assumptions or code harder to read imho.

This is for Gerd to decide.


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Marc-André Lureau 4 years, 1 month ago
Hi

On Tue, Mar 3, 2020 at 8:41 AM Markus Armbruster <armbru@redhat.com> wrote:
>
> Kevin Wolf <kwolf@redhat.com> writes:
>
> > Am 02.03.2020 um 15:22 hat Markus Armbruster geschrieben:
> >> Marc-André Lureau <marcandre.lureau@gmail.com> writes:
> >>
> >> > Hi
> >> >
> >> > On Fri, Feb 21, 2020 at 5:50 PM Markus Armbruster <armbru@redhat.com> wrote:
> >> >>
> >> >> Kevin Wolf <kwolf@redhat.com> writes:
> >> >>
> >> >> > Am 20.02.2020 um 17:01 hat Markus Armbruster geschrieben:
> >> >> >> >> >  void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >> >> >> >                      bool has_head, int64_t head, Error **errp)
> >> >> >> >> >  {
> >> >> >> >> >      QemuConsole *con;
> >> >> >> >> >      DisplaySurface *surface;
> >> >> >> >> > +    g_autoptr(pixman_image_t) image = NULL;
> >> >> >> >> >      int fd;
> >> >> >> >> >
> >> >> >> >> >      if (has_device) {
> >> >> >> >> > @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >> >> >> >          }
> >> >> >> >> >      }
> >> >> >> >> >
> >> >> >> >> > -    graphic_hw_update(con);
> >> >> >> >> > +    if (qemu_in_coroutine()) {
> >> >> >> >> > +        assert(!con->screendump_co);
> >> >> >> >> > +        con->screendump_co = qemu_coroutine_self();
> >> >> >> >> > +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
> >> >> >> >> > +                                graphic_hw_update_bh, con);
> >> >> >> >> > +        qemu_coroutine_yield();
> >> >> >> >> > +        con->screendump_co = NULL;
> >> >> >> >> > +    }
> >> >> >> >>
> >> >> >> >> What if multiple QMP monitors simultaneously screendump?  Hmm, it works
> >> >> >> >> because all execute one after another in the same coroutine
> >> >> >> >> qmp_dispatcher_co.  Implicit mutual exclusion.
> >> >> >> >>
> >> >> >> >> Executing them one after another is bad, because it lets an ill-behaved
> >> >> >> >> QMP command starve *all* QMP monitors.  We do it only out of
> >> >> >> >> (reasonable!) fear of implicit mutual exclusion requirements like the
> >> >> >> >> one you add.
> >> >> >> >>
> >> >> >> >> Let's not add more if we can help it.
> >> >> >> >
> >> >> >> > The situation is not worse than the current blocking handling.
> >> >> >>
> >> >> >> Really?
> >> >> >>
> >> >> >> What makes executing multiple qmp_screendump() concurrently (in separate
> >> >> >> threads) or interleaved (in separate coroutines in the same thread)
> >> >> >> unsafe before this patch?
> >> >> >
> >> >> > QMP command handlers are guaranteed to run in the main thread with the
> >> >> > BQL held, so there is no concurrency. If you want to change this, you
> >> >> > would have much more complicated problems to solve than in this handler.
> >> >> > I'm not sure it's fair to require thread-safety from one handler when
> >> >> > no other handler is thread safe (except accidentally) and nobody seems
> >> >> > to plan actually calling them from multiple threads.
> >> >>
> >> >> "Let's not [...] if we can help it." is hardly a "change this or else no
> >> >> merge" demand.  It is a challenge to find a more elegant solution.
> >> >>
> >> >> >> >> Your screendump_co is per QemuConsole instead of per QMP monitor only
> >> >> >> >> because you need to find the coroutine in graphic_hw_update_done().  Can
> >> >> >> >> we somehow pass it via function arguments?
> >> >> >> >
> >> >> >> > I think it could be done later, so I suggest a TODO.
> >> >> >>
> >> >> >> We should avoid making our dependence on implicit mutual exclusion
> >> >> >> worse.  When we do it anyway, a big, fat, ugly comment is definitely
> >> >> >> called for.
> >> >> >
> >> >> > Anyway, what I really wanted to add:
> >> >> >
> >> >> > This should be easy to solve by having a CoQueue instead of a single
> >> >>
> >> >> Ah, challenge accepted!  Exactly the outcome I was hoping for :)
> >> >>
> >> >> > Coroutine pointer. The coroutine would just call qemu_co_queue_wait(),
> >> >> > which adds itself to the queue before it yields and the update
> >> >> > completion would wake up all coroutines that are currently queued with
> >> >> > qemu_co_queue_restart_all().
> >> >> >
> >> >> > qemu_co_queue_wait() takes a lock as its second parameter. You don't
> >> >> > need it in this context and can just pass NULL. (This is a lock that
> >> >> > would be dropped while the coroutine is sleeping and automatically
> >> >> > reacquired afterwards.)
> >> >> >
> >> >> >> >> In case avoiding the mutual exclusion is impractical: please explain it
> >> >> >> >> in a comment to make it somewhat less implicit.
> >> >> >>
> >> >> >> It is anything but: see appended patch.
> >> >> >
> >> >> > This works, too, but it requires an additional struct. I think the queue
> >> >> > is easier. (Note there is a difference in the mechanism: Your patch
> >> >> > waits for the specific update it triggered, while the CoQueue would wait
> >> >> > for _any_ update to complete. I assume effectively the result is the
> >> >> > same.)
> >> >>
> >> >> Your idea sounds much nicer to me.  Thanks!
> >> >
> >> > Similar to the NULL check you asked to remove,
> >> > having a CoQueue there would lead to think that several concurrently
> >> > running screendump are possible.
> >> >
> >> > Is this a direction we are willing to take?
> >>
> >> Let's take a step back.
> >>
> >> The actual problem is to find the coroutine in graphic_hw_update_done(),
> >> so you can wake it.
> >>
> >> Your solution stores the coroutine in the QemuConsole, because that's
> >> readily available in graphic_hw_update_done().
> >>
> >> However, it really, really doesn't belong there, it belongs to the
> >> monitor.  Works anyway only because QMP commands execute one after the
> >> other.
> >>
> >> Kevin suggested using a CoQueue to avoid this unspoken dependency.  You
> >> object, because it could make readers assume multiple screendump
> >> commands could run concurrently, which is not the case.
> >>
> >> Alright, let's KISS: since there's just one main loop, there's just one
> >> coroutine: @qmp_dispatcher_co.  Let's use that, so the dependency on
> >> "one command after the other" is explicit and obvious.
> >
> > Ugh... If you choose that this is the way to go, please add an assertion
> > at least that we are indeed in qmp_dispatcher_co before yielding.
>
> No objection.
>
> To apply the QMP coroutine infrastructure for 5.0, I need a user.  We
> have two: block_resize from Kevin, and screendump from Marc-André.
> Neither is quite ready, yet.  I'll wait for a respin of either one.
>

Kevin series has conflicts, I will wait for his respin.

-- 
Marc-André Lureau

Re: [PATCH] console: make QMP screendump use coroutine
Posted by Markus Armbruster 4 years, 1 month ago
Marc-André Lureau <marcandre.lureau@gmail.com> writes:

[...]
>> To apply the QMP coroutine infrastructure for 5.0, I need a user.  We
>> have two: block_resize from Kevin, and screendump from Marc-André.
>> Neither is quite ready, yet.  I'll wait for a respin of either one.
>>
>
> Kevin series has conflicts, I will wait for his respin.

The conflict is trivial.  I'll prepare a base for you.


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Dr. David Alan Gilbert 4 years, 1 month ago
* Markus Armbruster (armbru@redhat.com) wrote:
> Cc: David for questions regarding the HMP core.  David, please look for
> "Is HMP blocking the main loop a problem?"
> 
> Marc-André Lureau <marcandre.lureau@gmail.com> writes:
> 
> > Hi
> >
> > On Thu, Feb 20, 2020 at 8:49 AM Markus Armbruster <armbru@redhat.com> wrote:
> >>
> >> Marc-André Lureau <marcandre.lureau@redhat.com> writes:
> >>
> >> > Thanks to the QMP coroutine support, the screendump handler can
> >> > trigger a graphic_hw_update(), yield and let the main loop run until
> >> > update is done. Then the handler is resumed, and the ppm_save() will
> >> > write the screen image to disk in the coroutine context (thus
> >> > non-blocking).
> >> >
> >> > For now, HMP doesn't have coroutine support, so it remains potentially
> >> > outdated or glitched.
> >> >
> >> > Fixes:
> >> > https://bugzilla.redhat.com/show_bug.cgi?id=1230527
> >> >
> >> > Based-on: <20200109183545.27452-2-kwolf@redhat.com>
> >> >
> >> > Cc: Kevin Wolf <kwolf@redhat.com>
> >> > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> >> > ---
> >> >  qapi/ui.json    |  3 ++-
> >> >  ui/console.c    | 35 +++++++++++++++++++++++++++--------
> >> >  ui/trace-events |  2 +-
> >> >  3 files changed, 30 insertions(+), 10 deletions(-)
> >> >
> >> > diff --git a/qapi/ui.json b/qapi/ui.json
> >> > index e04525d8b4..d941202f34 100644
> >> > --- a/qapi/ui.json
> >> > +++ b/qapi/ui.json
> >> > @@ -96,7 +96,8 @@
> >> >  #
> >> >  ##
> >> >  { 'command': 'screendump',
> >> > -  'data': {'filename': 'str', '*device': 'str', '*head': 'int'} }
> >> > +  'data': {'filename': 'str', '*device': 'str', '*head': 'int'},
> >> > +  'coroutine': true }
> >> >
> >> >  ##
> >> >  # == Spice
> >> > diff --git a/ui/console.c b/ui/console.c
> >> > index ac79d679f5..db184b473f 100644
> >> > --- a/ui/console.c
> >> > +++ b/ui/console.c
> >> > @@ -167,6 +167,7 @@ struct QemuConsole {
> >> >      QEMUFIFO out_fifo;
> >> >      uint8_t out_fifo_buf[16];
> >> >      QEMUTimer *kbd_timer;
> >> > +    Coroutine *screendump_co;
> >> >
> >> >      QTAILQ_ENTRY(QemuConsole) next;
> >> >  };
> >> > @@ -194,7 +195,6 @@ static void dpy_refresh(DisplayState *s);
> >> >  static DisplayState *get_alloc_displaystate(void);
> >> >  static void text_console_update_cursor_timer(void);
> >> >  static void text_console_update_cursor(void *opaque);
> >> > -static bool ppm_save(int fd, DisplaySurface *ds, Error **errp);
> >> >
> >> >  static void gui_update(void *opaque)
> >> >  {
> >> > @@ -263,6 +263,9 @@ static void gui_setup_refresh(DisplayState *ds)
> >> >
> >> >  void graphic_hw_update_done(QemuConsole *con)
> >> >  {
> >> > +    if (con && con->screendump_co) {
> >>
> >> How can !con happen?
> >
> > I don't think it can happen anymore (the patch evolved over several
> > years, this is probably a left-over). In any case, it doesn't hurt.
> 
> I hate such dead checks, because they make me assume they can actually
> happen.  Incorrect assumptions breed bugs.
> 
> But I'm willing to defer to the maintainer here.  Gerd?
> 
> >> > +        aio_co_wake(con->screendump_co);
> >> > +    }
> >> >  }
> >> >
> >> >  void graphic_hw_update(QemuConsole *con)
> >> > @@ -310,16 +313,16 @@ void graphic_hw_invalidate(QemuConsole *con)
> >> >      }
> >> >  }
> >> >
> >> > -static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
> >> > +static bool ppm_save(int fd, pixman_image_t *image, Error **errp)
> >> >  {
> >> > -    int width = pixman_image_get_width(ds->image);
> >> > -    int height = pixman_image_get_height(ds->image);
> >> > +    int width = pixman_image_get_width(image);
> >> > +    int height = pixman_image_get_height(image);
> >> >      g_autoptr(Object) ioc = OBJECT(qio_channel_file_new_fd(fd));
> >> >      g_autofree char *header = NULL;
> >> >      g_autoptr(pixman_image_t) linebuf = NULL;
> >> >      int y;
> >> >
> >> > -    trace_ppm_save(fd, ds);
> >> > +    trace_ppm_save(fd, image);
> >> >
> >> >      header = g_strdup_printf("P6\n%d %d\n%d\n", width, height, 255);
> >> >      if (qio_channel_write_all(QIO_CHANNEL(ioc),
> >> > @@ -329,7 +332,7 @@ static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
> >> >
> >> >      linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
> >> >      for (y = 0; y < height; y++) {
> >> > -        qemu_pixman_linebuf_fill(linebuf, ds->image, width, 0, y);
> >> > +        qemu_pixman_linebuf_fill(linebuf, image, width, 0, y);
> >> >          if (qio_channel_write_all(QIO_CHANNEL(ioc),
> >> >                                    (char *)pixman_image_get_data(linebuf),
> >> >                                    pixman_image_get_stride(linebuf), errp) < 0) {
> >>
> >> Looks like an unrelated optimization / simplification.  If I was
> >> maintainer, I'd ask for a separate patch.
> >
> > I can be split, but it's related. We should pass a reference to
> > pixman_image_t, rather than a pointer to DisplaySurface, as the
> > underlying image may change over time, and would result in corrupted
> > coroutine save or worse.
> 
> Work that into your commit message, please.  Might be easier if you
> split, but that's between you and the maintainer :)
> 
> >> > @@ -340,11 +343,18 @@ static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
> >> >      return true;
> >> >  }
> >> >
> >> > +static void graphic_hw_update_bh(void *con)
> >> > +{
> >> > +    graphic_hw_update(con);
> >> > +}
> >> > +
> >> > +/* may be called in coroutine context or not */
> >>
> >> Hmm.
> >>
> >> Even though the QMP core always calls in coroutine context, the comment
> >> is correct: hmp_screendump() calls it outside coroutine context.
> >> Because of that...
> >>
> >> >  void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >                      bool has_head, int64_t head, Error **errp)
> >> >  {
> >> >      QemuConsole *con;
> >> >      DisplaySurface *surface;
> >> > +    g_autoptr(pixman_image_t) image = NULL;
> >> >      int fd;
> >> >
> >> >      if (has_device) {
> >> > @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >          }
> >> >      }
> >> >
> >> > -    graphic_hw_update(con);
> >> > +    if (qemu_in_coroutine()) {
> >> > +        assert(!con->screendump_co);
> >>
> >> What if multiple QMP monitors simultaneously screendump?  Hmm, it works
> >> because all execute one after another in the same coroutine
> >> qmp_dispatcher_co.  Implicit mutual exclusion.
> >>
> >> Executing them one after another is bad, because it lets an ill-behaved
> >> QMP command starve *all* QMP monitors.  We do it only out of
> >> (reasonable!) fear of implicit mutual exclusion requirements like the
> >> one you add.
> >>
> >> Let's not add more if we can help it.
> >
> > The situation is not worse than the current blocking handling.
> 
> Really?
> 
> What makes executing multiple qmp_screendump() concurrently (in separate
> threads) or interleaved (in separate coroutines in the same thread)
> unsafe before this patch?
> 
> >> Your screendump_co is per QemuConsole instead of per QMP monitor only
> >> because you need to find the coroutine in graphic_hw_update_done().  Can
> >> we somehow pass it via function arguments?
> >
> > I think it could be done later, so I suggest a TODO.
> 
> We should avoid making our dependence on implicit mutual exclusion
> worse.  When we do it anyway, a big, fat, ugly comment is definitely
> called for.
> 
> >> In case avoiding the mutual exclusion is impractical: please explain it
> >> in a comment to make it somewhat less implicit.
> 
> It is anything but: see appended patch.
> 
> >> > +        con->screendump_co = qemu_coroutine_self();
> >> > +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
> >> > +                                graphic_hw_update_bh, con);
> >> > +        qemu_coroutine_yield();
> >> > +        con->screendump_co = NULL;
> >> > +    }
> >> > +
> >>
> >> ... the command handler needs extra code to cope with either.  Is this
> >> really what we want for coroutine QMP command handlers?  We'll acquire
> >> more of them, and I'd hate to make each one run both in and outside
> >> coroutine context.  Shouldn't we let the HMP core take care of this?  Or
> >> at least have some common infrastructure these handlers can use?
> >
> > We have several functions that have this dual support, for ex QIO.
> >
> > Changing both QMP & HMP commands to run in coroutine is likely
> > additional work that we may not care at this point.
> 
> If it wasn't for non-QMP calls (typically HMP, but also CLI), then
> handlers for QMP commands with 'coroutine': true could be coroutine_fn.
> 
> So far, coroutine_fn is merely documentation.  Perhaps it can guide a
> checker for "do coroutine stuff only in coroutines" some day.  Would be
> nice, because the coroutine calls are often buried deep, and far away
> from the code that ensures they run in a coroutine.
> 
> My point is: marking functions coroutine_fn is good.  We should do it
> more.  We should try to avoid stuff that hinders doing it more.
> 
> > I propose to leave a TODO, once we have several similar QMP & HMP mix
> > cases we can try to find a common HMP solution to make the code
> > simpler in QMP handler.
> 
> Collecting several users before building infrastructure makes sense when
> the design of the infrastructure isn't obvious, or when the need for it
> is in doubt.
> 
> Neither is the case for running QMP handlers in a coroutine: QMP
> commands blocking the main loop is without doubt a problem we need to
> solve, and the way to solve it was obvious enough for Kevin to do it
> with one user: block_resize.  A second one quickly followed: screendump.
> 
> The only part that's different for HMP, I think, is "need".
> 
> Is HMP blocking the main loop a problem?
> 
> If yes, is it serious enough to justify solving it?

I don't mind if HMP blocks for a small time while doing something, but
not if it can hang if the guest (or something else like it) misbehaves.
Not if it's something you might need to issue another command to recover
from.

Dave

> If yes, then putting workarounds into QMP handlers now so we can put off
> solving it some more is taking on technical debt.
> 
> > I don't know if this is going to be a common pattern, we may end up
> > with conversions that can run both without explicit handling (like the
> > ppm_save() function, thanks to QIO).
> 
> Yes, such handlers may exist.  Running them out of coroutine context
> would throw away their capability not to block the event loop, though,
> wouldn't it?
> 
> >> Why is it okay not to call graphic_hw_update() anymore when
> >> !qemu_in_coroutine()?
> >
> > You could call it, but then you should wait for completion by
> > reentering the main loop (that was the point of my earlier qapi-async
> > series)
> 
> Possibly stupid question: why is it necessary before this patch
> (assuming it is, since we call it), and why is it no longer necessary
> after?
> 
> Oh, see below.
> 
> >>
> >> If qemu_in_coroutine(), we now run graphic_hw_update() in a bottom half,
> >> then yield until the update completes (see graphic_hw_update_done()
> >> above).  Can you explain the need for the bottom half?
> >
> > At least spice rendering is done in a separate thread, completion is async.
> 
> When I ask a stupid question like this one, I'm really after the "for
> dummies" explanation.  I may be able to figure it out myself with some
> effort, but having to put that kind of effort into patch review makes me
> grumpy, and once I'm sufficiently grumpy, I don't want to review patches
> anymore, let alone merge them.
> 
> Oh well, let me try.  We're in the main loop.  We want to trigger a
> "graphics update" (whatever that is, doesn't matter) and wait for it to
> complete without blocking the main loop.
> 
> "Without blocking the main loop" means the QMP coroutine yields.  I'd
> naively expect
> 
>     QMP coroutine: schedule the job; yield
>     whatever gets scheduled: complete the job; wake up QMP coroutine
> 
> Now let's examine the "graphics update" interface.
> 
> GraphicHwOps callback gfx_update() comes in two flavours:
> 
> * synchronous: complete the job, return
> 
> * asynchronous: start the job, return immediately,
>   graphic_hw_update_done() will get called on job completion
> 
> graphic_hw_update() partly hides the difference:
> 
> * synchronous: complete the job, call graphic_hw_update_done()
> 
> * asynchronous: start the job, return immediately,
>   graphic_hw_update_done() will get called on job completion
> 
> This lets you treat the synchronous case more like the asynchronous
> case.
> 
> You use graphic_hw_update_done() to wake up the QMP coroutine.
> 
> I think I can now answer my question "why is it okay not to call
> graphic_hw_update() anymore when !qemu_in_coroutine()?"
> 
> Before the patch, both QMP and HMP:
> 
> * with synchronous gfx_update(): we update before we write out the
>   screendump.  The screendump is up-to-date.  Both update and write out
>   block the main loop.
> 
> * with asynchronous gfx_update(): we start updating, but don't wait for
>   it to complete before we write out.  This is wrong.  Write out blocks
>   the main loop, but update does not.
> 
> After the patch:
> 
> * QMP with either gfx_update(): we update before we write out the
>   screendump.  The screendump is up-to-date.  Neither update nor write
>   out block the main loop.  Improvement.
> 
> * HMP with either gfx_update(): we don't update before we write out.
>   Similarly wrong for asynchronous gfx_update(), regression for
>   synchronous gfx_update().  Write out blocks the main loop as before.
> 
> Why is the regression okay?
> 
> Back to the bottom half.  The way graphic_hw_update() works, the QMP
> coroutine can't schedule then yield.  It *has* to yield before
> graphic_hw_update() runs.  That means we need a bottom half.
> 
> Alright, I'm officially grumpy now.
> 
> Please explain the need for a bottom half in a comment.
> 
> >> >      surface = qemu_console_surface(con);
> >> >      if (!surface) {
> >> >          error_setg(errp, "no surface");
> >> > @@ -379,7 +397,8 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
> >> >          return;
> >> >      }
> >> >
> >> > -    if (!ppm_save(fd, surface, errp)) {
> >> > +    image = pixman_image_ref(surface->image);
> >> > +    if (!ppm_save(fd, image, errp)) {
> >> >          qemu_unlink(filename);
> >> >      }
> >> >  }
> >> > diff --git a/ui/trace-events b/ui/trace-events
> >> > index 0dcda393c1..e8726fc969 100644
> >> > --- a/ui/trace-events
> >> > +++ b/ui/trace-events
> >> > @@ -15,7 +15,7 @@ displaysurface_create_pixman(void *display_surface) "surface=%p"
> >> >  displaysurface_free(void *display_surface) "surface=%p"
> >> >  displaychangelistener_register(void *dcl, const char *name) "%p [ %s ]"
> >> >  displaychangelistener_unregister(void *dcl, const char *name) "%p [ %s ]"
> >> > -ppm_save(int fd, void *display_surface) "fd=%d surface=%p"
> >> > +ppm_save(int fd, void *image) "fd=%d image=%p"
> >> >
> >> >  # gtk.c
> >> >  # gtk-gl-area.c
> 
> 
> diff --git a/ui/console.c b/ui/console.c
> index 57df3a5439..c5aabf5a5f 100644
> --- a/ui/console.c
> +++ b/ui/console.c
> @@ -167,7 +167,6 @@ struct QemuConsole {
>      QEMUFIFO out_fifo;
>      uint8_t out_fifo_buf[16];
>      QEMUTimer *kbd_timer;
> -    Coroutine *screendump_co;
>  
>      QTAILQ_ENTRY(QemuConsole) next;
>  };
> @@ -261,14 +260,20 @@ static void gui_setup_refresh(DisplayState *ds)
>      ds->have_text = have_text;
>  }
>  
> -void graphic_hw_update_done(QemuConsole *con)
> +static void graphic_hw_update_done_with_kick(QemuConsole *con,
> +                                             Coroutine *kick_me)
>  {
> -    if (con && con->screendump_co) {
> -        aio_co_wake(con->screendump_co);
> +    if (kick_me) {
> +        aio_co_wake(kick_me);
>      }
>  }
>  
> -void graphic_hw_update(QemuConsole *con)
> +void graphic_hw_update_done(QemuConsole *con)
> +{
> +    graphic_hw_update_done_with_kick(con, NULL);
> +}
> +
> +static void graphic_hw_update_with_kick(QemuConsole *con, Coroutine *kick_me)
>  {
>      bool async = false;
>      if (!con) {
> @@ -279,10 +284,15 @@ void graphic_hw_update(QemuConsole *con)
>          async = con->hw_ops->gfx_update_async;
>      }
>      if (!async) {
> -        graphic_hw_update_done(con);
> +        graphic_hw_update_done_with_kick(con, kick_me);
>      }
>  }
>  
> +void graphic_hw_update(QemuConsole *con)
> +{
> +    graphic_hw_update_with_kick(con, NULL);
> +}
> +
>  void graphic_hw_gl_block(QemuConsole *con, bool block)
>  {
>      assert(con != NULL);
> @@ -343,9 +353,16 @@ static bool ppm_save(int fd, pixman_image_t *image, Error **errp)
>      return true;
>  }
>  
> -static void graphic_hw_update_bh(void *con)
> +typedef struct {
> +    QemuConsole *con;
> +    Coroutine *kick_me;
> +} UpdateBHargs;
> +
> +static void graphic_hw_update_bh(void *p)
>  {
> -    graphic_hw_update(con);
> +    UpdateBHargs *args = p;
> +
> +    graphic_hw_update_with_kick(args->con, args->kick_me);
>  }
>  
>  /* may be called in coroutine context or not */
> @@ -376,12 +393,13 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
>      }
>  
>      if (qemu_in_coroutine()) {
> -        assert(!con->screendump_co);
> -        con->screendump_co = qemu_coroutine_self();
> +        UpdateBHargs args = {
> +            .con = con,
> +            .kick_me = qemu_coroutine_self(),
> +        };
>          aio_bh_schedule_oneshot(qemu_get_aio_context(),
> -                                graphic_hw_update_bh, con);
> +                                graphic_hw_update_bh, &args);
>          qemu_coroutine_yield();
> -        con->screendump_co = NULL;
>      }
>  
>      surface = qemu_console_surface(con);
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Markus Armbruster 4 years, 1 month ago
"Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:

> * Markus Armbruster (armbru@redhat.com) wrote:
[...]
>> Collecting several users before building infrastructure makes sense when
>> the design of the infrastructure isn't obvious, or when the need for it
>> is in doubt.
>> 
>> Neither is the case for running QMP handlers in a coroutine: QMP
>> commands blocking the main loop is without doubt a problem we need to
>> solve, and the way to solve it was obvious enough for Kevin to do it
>> with one user: block_resize.  A second one quickly followed: screendump.
>> 
>> The only part that's different for HMP, I think, is "need".
>> 
>> Is HMP blocking the main loop a problem?
>> 
>> If yes, is it serious enough to justify solving it?
>
> I don't mind if HMP blocks for a small time while doing something, but
> not if it can hang if the guest (or something else like it) misbehaves.
> Not if it's something you might need to issue another command to recover
> from.

The issue isn't HMP being unavailable while a command executes.  The
issue is HMP stopping the main loop while a command executes.

Stopping the main loop not only stops everything running there, it can
also stop other threads when they synchronize with the main loop via the
Big QEMU Lock.

The obvious example is a command accessing a remote filesystem.  Special
case: NFS with the hard option can hang indefinitely.

screendump does that, and also waits for asynchronous gfx_update() with
qxl devices.  Networking again, with a different peer.

We already decided that QMP commands stopping the main loop is serious.

To say it's not serious for HMP amounts to "don't do that then, use
QMP".  Which may be fair.  Not for me to decide, though.


Re: [PATCH] console: make QMP screendump use coroutine
Posted by Dr. David Alan Gilbert 4 years, 1 month ago
* Markus Armbruster (armbru@redhat.com) wrote:
> "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:
> 
> > * Markus Armbruster (armbru@redhat.com) wrote:
> [...]
> >> Collecting several users before building infrastructure makes sense when
> >> the design of the infrastructure isn't obvious, or when the need for it
> >> is in doubt.
> >> 
> >> Neither is the case for running QMP handlers in a coroutine: QMP
> >> commands blocking the main loop is without doubt a problem we need to
> >> solve, and the way to solve it was obvious enough for Kevin to do it
> >> with one user: block_resize.  A second one quickly followed: screendump.
> >> 
> >> The only part that's different for HMP, I think, is "need".
> >> 
> >> Is HMP blocking the main loop a problem?
> >> 
> >> If yes, is it serious enough to justify solving it?
> >
> > I don't mind if HMP blocks for a small time while doing something, but
> > not if it can hang if the guest (or something else like it) misbehaves.
> > Not if it's something you might need to issue another command to recover
> > from.
> 
> The issue isn't HMP being unavailable while a command executes.  The
> issue is HMP stopping the main loop while a command executes.
> 
> Stopping the main loop not only stops everything running there, it can
> also stop other threads when they synchronize with the main loop via the
> Big QEMU Lock.

Yep.

> The obvious example is a command accessing a remote filesystem.  Special
> case: NFS with the hard option can hang indefinitely.

That I don't worry about too much for HMP; if your host is hosed, fix your host.

> screendump does that, and also waits for asynchronous gfx_update() with
> qxl devices.  Networking again, with a different peer.

That I would worry about since that's probably got interactions with the
guest and spice, and all the type of things you might be trying to debug
or test.

> We already decided that QMP commands stopping the main loop is serious.
> 
> To say it's not serious for HMP amounts to "don't do that then, use
> QMP".  Which may be fair.  Not for me to decide, though.

It's certainly more important for QMP; you don't want the main lock
being held for everyday type of interactions with management layers.

Dave

--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK