virtio_load() as a whole should run in coroutine context because it
reads from the migration stream and we don't want this to block.
However, it calls virtio_set_features_nocheck() and devices don't
expect their .set_features callback to run in a coroutine and therefore
call functions that may not be called in coroutine context. To fix this,
drop out of coroutine context for calling virtio_set_features_nocheck().
Without this fix, the following crash was reported:
#0 __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at pthread_kill.c:44
#1 0x00007efc738c05d3 in __pthread_kill_internal (signo=6, threadid=<optimized out>) at pthread_kill.c:78
#2 0x00007efc73873d26 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#3 0x00007efc738477f3 in __GI_abort () at abort.c:79
#4 0x00007efc7384771b in __assert_fail_base (fmt=0x7efc739dbcb8 "", assertion=assertion@entry=0x560aebfbf5cf "!qemu_in_coroutine()",
file=file@entry=0x560aebfcd2d4 "../block/graph-lock.c", line=line@entry=275, function=function@entry=0x560aebfcd34d "void bdrv_graph_rdlock_main_loop(void)") at assert.c:92
#5 0x00007efc7386ccc6 in __assert_fail (assertion=0x560aebfbf5cf "!qemu_in_coroutine()", file=0x560aebfcd2d4 "../block/graph-lock.c", line=275,
function=0x560aebfcd34d "void bdrv_graph_rdlock_main_loop(void)") at assert.c:101
#6 0x0000560aebcd8dd6 in bdrv_register_buf ()
#7 0x0000560aeb97ed97 in ram_block_added.llvm ()
#8 0x0000560aebb8303f in ram_block_add.llvm ()
#9 0x0000560aebb834fa in qemu_ram_alloc_internal.llvm ()
#10 0x0000560aebb2ac98 in vfio_region_mmap ()
#11 0x0000560aebb3ea0f in vfio_bars_register ()
#12 0x0000560aebb3c628 in vfio_realize ()
#13 0x0000560aeb90f0c2 in pci_qdev_realize ()
#14 0x0000560aebc40305 in device_set_realized ()
#15 0x0000560aebc48e07 in property_set_bool.llvm ()
#16 0x0000560aebc46582 in object_property_set ()
#17 0x0000560aebc4cd58 in object_property_set_qobject ()
#18 0x0000560aebc46ba7 in object_property_set_bool ()
#19 0x0000560aeb98b3ca in qdev_device_add_from_qdict ()
#20 0x0000560aebb1fbaf in virtio_net_set_features ()
#21 0x0000560aebb46b51 in virtio_set_features_nocheck ()
#22 0x0000560aebb47107 in virtio_load ()
#23 0x0000560aeb9ae7ce in vmstate_load_state ()
#24 0x0000560aeb9d2ee9 in qemu_loadvm_state_main ()
#25 0x0000560aeb9d45e1 in qemu_loadvm_state ()
#26 0x0000560aeb9bc32c in process_incoming_migration_co.llvm ()
#27 0x0000560aebeace56 in coroutine_trampoline.llvm ()
Cc: qemu-stable@nongnu.org
Buglink: https://issues.redhat.com/browse/RHEL-832
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
hw/virtio/virtio.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 40 insertions(+), 5 deletions(-)
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 309038fd46..969c25f4cf 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2825,8 +2825,9 @@ static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
}
/* A wrapper for use as a VMState .get function */
-static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
- const VMStateField *field)
+static int coroutine_mixed_fn
+virtio_device_get(QEMUFile *f, void *opaque, size_t size,
+ const VMStateField *field)
{
VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
@@ -2853,6 +2854,39 @@ static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
return bad ? -1 : 0;
}
+typedef struct VirtioSetFeaturesNocheckData {
+ Coroutine *co;
+ VirtIODevice *vdev;
+ uint64_t val;
+ int ret;
+} VirtioSetFeaturesNocheckData;
+
+static void virtio_set_features_nocheck_bh(void *opaque)
+{
+ VirtioSetFeaturesNocheckData *data = opaque;
+
+ data->ret = virtio_set_features_nocheck(data->vdev, data->val);
+ aio_co_wake(data->co);
+}
+
+static int coroutine_mixed_fn
+virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val)
+{
+ if (qemu_in_coroutine()) {
+ VirtioSetFeaturesNocheckData data = {
+ .co = qemu_coroutine_self(),
+ .vdev = vdev,
+ .val = val,
+ };
+ aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
+ virtio_set_features_nocheck_bh, &data);
+ qemu_coroutine_yield();
+ return data.ret;
+ } else {
+ return virtio_set_features_nocheck(vdev, val);
+ }
+}
+
int virtio_set_features(VirtIODevice *vdev, uint64_t val)
{
int ret;
@@ -2906,7 +2940,8 @@ size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
return config_size;
}
-int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
+int coroutine_mixed_fn
+virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
{
int i, ret;
int32_t config_len;
@@ -3023,14 +3058,14 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
* host_features.
*/
uint64_t features64 = vdev->guest_features;
- if (virtio_set_features_nocheck(vdev, features64) < 0) {
+ if (virtio_set_features_nocheck_maybe_co(vdev, features64) < 0) {
error_report("Features 0x%" PRIx64 " unsupported. "
"Allowed features: 0x%" PRIx64,
features64, vdev->host_features);
return -1;
}
} else {
- if (virtio_set_features_nocheck(vdev, features) < 0) {
+ if (virtio_set_features_nocheck_maybe_co(vdev, features) < 0) {
error_report("Features 0x%x unsupported. "
"Allowed features: 0x%" PRIx64,
features, vdev->host_features);
--
2.41.0
05.09.2023 17:50, Kevin Wolf wrote: > virtio_load() as a whole should run in coroutine context because it > reads from the migration stream and we don't want this to block. > > However, it calls virtio_set_features_nocheck() and devices don't > expect their .set_features callback to run in a coroutine and therefore > call functions that may not be called in coroutine context. To fix this, > drop out of coroutine context for calling virtio_set_features_nocheck(). ... > Cc: qemu-stable@nongnu.org > Buglink: https://issues.redhat.com/browse/RHEL-832 > Signed-off-by: Kevin Wolf <kwolf@redhat.com> It looks like this change caused an interesting regression, https://gitlab.com/qemu-project/qemu/-/issues/1933 at least in -stable. Can you take a look please? BTW, Kevin, do you have account @gitlab? Thanks, /mjt
Michael Tokarev <mjt@tls.msk.ru> wrote: > 05.09.2023 17:50, Kevin Wolf wrote: >> virtio_load() as a whole should run in coroutine context because it >> reads from the migration stream and we don't want this to block. >> However, it calls virtio_set_features_nocheck() and devices don't >> expect their .set_features callback to run in a coroutine and therefore >> call functions that may not be called in coroutine context. To fix this, >> drop out of coroutine context for calling virtio_set_features_nocheck(). > ... >> Cc: qemu-stable@nongnu.org >> Buglink: https://issues.redhat.com/browse/RHEL-832 >> Signed-off-by: Kevin Wolf <kwolf@redhat.com> > > It looks like this change caused an interesting regression, > https://gitlab.com/qemu-project/qemu/-/issues/1933 > at least in -stable. Can you take a look please? > > BTW, Kevin, do you have account @gitlab? Dunno what is going on here, but failing postcopy is weird. 2023-10-12T06:23:44.354387Z qemu-system-x86_64: warning: TSC frequency mismatch between VM (2892749 kHz) and host (2799999 kHz), and TSC scaling unavailable 2023-10-12T06:23:44.354538Z qemu-system-x86_64: warning: TSC frequency mismatch between VM (2892749 kHz) and host (2799999 kHz), and TSC scaling unavailable I hope/guess that the problem is not TSC related? i.e. does other tests work between this two machines? Once discarding that, we get on source: 2023-10-12 06:23:43.412+0000: initiating migration 2023-10-12T06:23:44.362392Z qemu-system-x86_64: failed to save SaveStateEntry with id(name): 3(ram): -5 So migration was aborted, and -5 is EIO on my system. So we are having trouble here with a write() somewhere. Later, Juan. > Thanks, > > /mjt
Am 17.10.2023 um 07:19 hat Michael Tokarev geschrieben: > 05.09.2023 17:50, Kevin Wolf wrote: > > virtio_load() as a whole should run in coroutine context because it > > reads from the migration stream and we don't want this to block. > > > > However, it calls virtio_set_features_nocheck() and devices don't > > expect their .set_features callback to run in a coroutine and therefore > > call functions that may not be called in coroutine context. To fix this, > > drop out of coroutine context for calling virtio_set_features_nocheck(). > ... > > Cc: qemu-stable@nongnu.org > > Buglink: https://issues.redhat.com/browse/RHEL-832 > > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > > It looks like this change caused an interesting regression, > https://gitlab.com/qemu-project/qemu/-/issues/1933 > at least in -stable. Can you take a look please? Huh?! This is an interesting one indeed. I can't see any direct connection between the patch and this regression. Random memory corruption is the only explanation I have. But I'm not sure how this patch could cause it, it's quite simple. The next step is probably trying to find a simple reproducer on the QEMU level. And then maybe valgrind or we could get stack traces for the call to virtio_set_features_nocheck_maybe_co(). Also the stack trace for the crash and maybe the content of 's' would be interesting - we can ask the reporter for that, the core dump should be enough for that. Another potentially interesting question is whether after yielding, the coroutine is indeed reentered from the aio_co_wake() call in the patch or if something else wakes it up. If it were the latter, that could explain memory corruption. > BTW, Kevin, do you have account @gitlab? Yes, @kmwolf. Kevin
On Tue, Sep 05, 2023 at 04:50:02PM +0200, Kevin Wolf wrote: > virtio_load() as a whole should run in coroutine context because it > reads from the migration stream and we don't want this to block. Is that "should" a "must" or a "can"? If it's a "must" then virtio_load() needs assert(qemu_in_coroutine()). But the previous patch mentioned that loadvm for snapshots calls it outside coroutine context. So maybe it's a "can"? > > However, it calls virtio_set_features_nocheck() and devices don't > expect their .set_features callback to run in a coroutine and therefore > call functions that may not be called in coroutine context. To fix this, > drop out of coroutine context for calling virtio_set_features_nocheck(). > > Without this fix, the following crash was reported: > > #0 __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at pthread_kill.c:44 > #1 0x00007efc738c05d3 in __pthread_kill_internal (signo=6, threadid=<optimized out>) at pthread_kill.c:78 > #2 0x00007efc73873d26 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26 > #3 0x00007efc738477f3 in __GI_abort () at abort.c:79 > #4 0x00007efc7384771b in __assert_fail_base (fmt=0x7efc739dbcb8 "", assertion=assertion@entry=0x560aebfbf5cf "!qemu_in_coroutine()", > file=file@entry=0x560aebfcd2d4 "../block/graph-lock.c", line=line@entry=275, function=function@entry=0x560aebfcd34d "void bdrv_graph_rdlock_main_loop(void)") at assert.c:92 > #5 0x00007efc7386ccc6 in __assert_fail (assertion=0x560aebfbf5cf "!qemu_in_coroutine()", file=0x560aebfcd2d4 "../block/graph-lock.c", line=275, > function=0x560aebfcd34d "void bdrv_graph_rdlock_main_loop(void)") at assert.c:101 > #6 0x0000560aebcd8dd6 in bdrv_register_buf () > #7 0x0000560aeb97ed97 in ram_block_added.llvm () > #8 0x0000560aebb8303f in ram_block_add.llvm () > #9 0x0000560aebb834fa in qemu_ram_alloc_internal.llvm () > #10 0x0000560aebb2ac98 in vfio_region_mmap () > #11 0x0000560aebb3ea0f in vfio_bars_register () > #12 0x0000560aebb3c628 in vfio_realize () > #13 0x0000560aeb90f0c2 in pci_qdev_realize () > #14 0x0000560aebc40305 in device_set_realized () > #15 0x0000560aebc48e07 in property_set_bool.llvm () > #16 0x0000560aebc46582 in object_property_set () > #17 0x0000560aebc4cd58 in object_property_set_qobject () > #18 0x0000560aebc46ba7 in object_property_set_bool () > #19 0x0000560aeb98b3ca in qdev_device_add_from_qdict () > #20 0x0000560aebb1fbaf in virtio_net_set_features () > #21 0x0000560aebb46b51 in virtio_set_features_nocheck () > #22 0x0000560aebb47107 in virtio_load () > #23 0x0000560aeb9ae7ce in vmstate_load_state () > #24 0x0000560aeb9d2ee9 in qemu_loadvm_state_main () > #25 0x0000560aeb9d45e1 in qemu_loadvm_state () > #26 0x0000560aeb9bc32c in process_incoming_migration_co.llvm () > #27 0x0000560aebeace56 in coroutine_trampoline.llvm () > > Cc: qemu-stable@nongnu.org > Buglink: https://issues.redhat.com/browse/RHEL-832 > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > --- > hw/virtio/virtio.c | 45 ++++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 40 insertions(+), 5 deletions(-) Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Am 07.09.2023 um 20:40 hat Stefan Hajnoczi geschrieben: > On Tue, Sep 05, 2023 at 04:50:02PM +0200, Kevin Wolf wrote: > > virtio_load() as a whole should run in coroutine context because it > > reads from the migration stream and we don't want this to block. > > Is that "should" a "must" or a "can"? > > If it's a "must" then virtio_load() needs assert(qemu_in_coroutine()). > > But the previous patch mentioned that loadvm for snapshots calls it > outside coroutine context. So maybe it's a "can"? Where this makes a difference is when the function indirectly calls into QIOChannel. When called from a coroutine, it yields while waiting for I/O, and outside of a coroutine it blocks. Yielding is always preferable, but in cases like HMP savevm/loadvm we also don't really care because it's synchronous anyway. Whether that makes it a MAY or a SHOULD in the RFC sense, you decide. If you wanted to make it a MUST, you'd need to check all callers first and change some of them. Kevin
On Fri, Sep 08, 2023 at 10:59:48AM +0200, Kevin Wolf wrote: > Am 07.09.2023 um 20:40 hat Stefan Hajnoczi geschrieben: > > On Tue, Sep 05, 2023 at 04:50:02PM +0200, Kevin Wolf wrote: > > > virtio_load() as a whole should run in coroutine context because it > > > reads from the migration stream and we don't want this to block. > > > > Is that "should" a "must" or a "can"? > > > > If it's a "must" then virtio_load() needs assert(qemu_in_coroutine()). > > > > But the previous patch mentioned that loadvm for snapshots calls it > > outside coroutine context. So maybe it's a "can"? > > Where this makes a difference is when the function indirectly calls into > QIOChannel. When called from a coroutine, it yields while waiting for > I/O, and outside of a coroutine it blocks. Yielding is always > preferable, but in cases like HMP savevm/loadvm we also don't really > care because it's synchronous anyway. > > Whether that makes it a MAY or a SHOULD in the RFC sense, you decide. > If you wanted to make it a MUST, you'd need to check all callers first > and change some of them. Thanks for clarifying. It is "can". Stefan
© 2016 - 2024 Red Hat, Inc.