chardev/char-io.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+)
After 038b4217884c ("Revert "chardev: use a child source for qio input
source"") we've been observing the "iwp->src == NULL" assertion
triggering periodically during the initial capabilities querying by
libvirtd. One of possible backtraces:
Thread 1 (Thread 0x7f16cd4f0700 (LWP 43858)):
0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
1 0x00007f16c6c21e65 in __GI_abort () at abort.c:79
2 0x00007f16c6c21d39 in __assert_fail_base at assert.c:92
3 0x00007f16c6c46e86 in __GI___assert_fail (assertion=assertion@entry=0x562e9bcdaadd "iwp->src == NULL", file=file@entry=0x562e9bcdaac8 "../chardev/char-io.c", line=line@entry=99, function=function@entry=0x562e9bcdab10 <__PRETTY_FUNCTION__.20549> "io_watch_poll_finalize") at assert.c:101
4 0x0000562e9ba20c2c in io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:99
5 io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:88
6 0x00007f16c904aae0 in g_source_unref_internal () from /lib64/libglib-2.0.so.0
7 0x00007f16c904baf9 in g_source_destroy_internal () from /lib64/libglib-2.0.so.0
8 0x0000562e9ba20db0 in io_remove_watch_poll (source=0x562e9d6720b0) at ../chardev/char-io.c:147
9 remove_fd_in_watch (chr=chr@entry=0x562e9d5f3800) at ../chardev/char-io.c:153
10 0x0000562e9ba23ffb in update_ioc_handlers (s=0x562e9d5f3800) at ../chardev/char-socket.c:592
11 0x0000562e9ba2072f in qemu_chr_fe_set_handlers_full at ../chardev/char-fe.c:279
12 0x0000562e9ba207a9 in qemu_chr_fe_set_handlers at ../chardev/char-fe.c:304
13 0x0000562e9ba2ca75 in monitor_qmp_setup_handlers_bh (opaque=0x562e9d4c2c60) at ../monitor/qmp.c:509
14 0x0000562e9bb6222e in aio_bh_poll (ctx=ctx@entry=0x562e9d4c2f20) at ../util/async.c:216
15 0x0000562e9bb4de0a in aio_poll (ctx=0x562e9d4c2f20, blocking=blocking@entry=true) at ../util/aio-posix.c:722
16 0x0000562e9b99dfaa in iothread_run (opaque=0x562e9d4c26f0) at ../iothread.c:63
17 0x0000562e9bb505a4 in qemu_thread_start (args=0x562e9d4c7ea0) at ../util/qemu-thread-posix.c:543
18 0x00007f16c70081ca in start_thread (arg=<optimized out>) at pthread_create.c:479
19 0x00007f16c6c398d3 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
io_remove_watch_poll(), which makes sure that iwp->src is NULL, calls
g_source_destroy() which finds that iwp->src is not NULL in the finalize
callback. This can only happen if another thread has managed to trigger
io_watch_poll_prepare() callback in the meantime.
Introduce a mutex and a boolean variable to prevent other threads
creating a watch in io_watch_poll_prepare() in case that the IOWatchPoll
itself is about to get destroyed.
Signed-off-by: Sergey Dyasli <sergey.dyasli@nutanix.com>
---
chardev/char-io.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/chardev/char-io.c b/chardev/char-io.c
index dab77b112e35..b1edccf0cc85 100644
--- a/chardev/char-io.c
+++ b/chardev/char-io.c
@@ -34,6 +34,9 @@ typedef struct IOWatchPoll {
GSourceFunc fd_read;
void *opaque;
GMainContext *context;
+
+ QemuMutex mut;
+ bool dead;
} IOWatchPoll;
static IOWatchPoll *io_watch_poll_from_source(GSource *source)
@@ -62,10 +65,20 @@ static gboolean io_watch_poll_prepare(GSource *source,
* more data.
*/
if (now_active) {
+ qemu_mutex_lock(&iwp->mut);
+
+ /* Don't create a watch if we are about to be destroyed. */
+ if (iwp->dead) {
+ qemu_mutex_unlock(&iwp->mut);
+ return FALSE;
+ }
+
iwp->src = qio_channel_create_watch(
iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
g_source_attach(iwp->src, iwp->context);
+
+ qemu_mutex_unlock(&iwp->mut);
} else {
g_source_destroy(iwp->src);
g_source_unref(iwp->src);
@@ -97,6 +110,7 @@ static void io_watch_poll_finalize(GSource *source)
*/
IOWatchPoll *iwp = io_watch_poll_from_source(source);
assert(iwp->src == NULL);
+ qemu_mutex_destroy(&iwp->mut);
}
static GSourceFuncs io_watch_poll_funcs = {
@@ -124,6 +138,8 @@ GSource *io_add_watch_poll(Chardev *chr,
iwp->fd_read = (GSourceFunc) fd_read;
iwp->src = NULL;
iwp->context = context;
+ qemu_mutex_init(&iwp->mut);
+ iwp->dead = false;
name = g_strdup_printf("chardev-iowatch-%s", chr->label);
g_source_set_name((GSource *)iwp, name);
@@ -139,11 +155,16 @@ static void io_remove_watch_poll(GSource *source)
IOWatchPoll *iwp;
iwp = io_watch_poll_from_source(source);
+
+ qemu_mutex_lock(&iwp->mut);
+ iwp->dead = true;
if (iwp->src) {
g_source_destroy(iwp->src);
g_source_unref(iwp->src);
iwp->src = NULL;
}
+ qemu_mutex_unlock(&iwp->mut);
+
g_source_destroy(&iwp->parent);
}
--
2.22.3
On 7/11/24 11:51, Sergey Dyasli wrote: > After 038b4217884c ("Revert "chardev: use a child source for qio input > source"") we've been observing the "iwp->src == NULL" assertion > triggering periodically during the initial capabilities querying by > libvirtd. One of possible backtraces: Hi Sergey, thanks for the analysis! I noticed however that this comment is really old; it was added from commit 2b316774f60 ("qemu-char: do not operate on sources from finalize callbacks", 2013-04-22): /* Due to a glib bug, removing the last reference to a source * inside a finalize callback causes recursive locking (and a * deadlock). This is not a problem inside other callbacks, * including dispatch callbacks, so we call io_remove_watch_poll * to remove this source. At this point, iwp->src must * be NULL, or we would leak it. * * This would be solved much more elegantly by child sources, * but we support older glib versions that do not have them. */ and the original mailing list message points to a problem on RHEL6 and Wheezy, which were both relatively old in 2013. And in fact the issue with finalize had been fixed in glib in 2010: commit b358202856682e5cdefb0b4b8aaed3a45d9a85fa Author: Dan Winship <danw@gnome.org> Date: Sat Nov 6 09:35:25 2010 -0400 gmain: move finalization of GSource outside of context lock This avoids ugly deadlock situations such as in https://bugzilla.gnome.org/show_bug.cgi?id=586432 https://bugzilla.gnome.org/show_bug.cgi?id=626702 https://bugzilla.gnome.org/show_bug.cgi?id=634239 diff --git a/glib/gmain.c b/glib/gmain.c index b182c6607..301adb0a7 100644 --- a/glib/gmain.c +++ b/glib/gmain.c @@ -1520,7 +1520,13 @@ g_source_unref_internal (GSource *source, g_source_list_remove (source, context); if (source->source_funcs->finalize) - source->source_funcs->finalize (source); + { + if (context) + UNLOCK_CONTEXT (context); + source->source_funcs->finalize (source); + if (context) + LOCK_CONTEXT (context); + } g_free (source->name); source->name = NULL; So I think we should just revert commit 2b316774f60, which is not hard to do (if it works) even if the code has since moved from qemu-char.c to chardev/char-io.c. Thanks, Paolo > Thread 1 (Thread 0x7f16cd4f0700 (LWP 43858)): > 0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 > 1 0x00007f16c6c21e65 in __GI_abort () at abort.c:79 > 2 0x00007f16c6c21d39 in __assert_fail_base at assert.c:92 > 3 0x00007f16c6c46e86 in __GI___assert_fail (assertion=assertion@entry=0x562e9bcdaadd "iwp->src == NULL", file=file@entry=0x562e9bcdaac8 "../chardev/char-io.c", line=line@entry=99, function=function@entry=0x562e9bcdab10 <__PRETTY_FUNCTION__.20549> "io_watch_poll_finalize") at assert.c:101 > 4 0x0000562e9ba20c2c in io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:99 > 5 io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:88 > 6 0x00007f16c904aae0 in g_source_unref_internal () from /lib64/libglib-2.0.so.0 > 7 0x00007f16c904baf9 in g_source_destroy_internal () from /lib64/libglib-2.0.so.0 > 8 0x0000562e9ba20db0 in io_remove_watch_poll (source=0x562e9d6720b0) at ../chardev/char-io.c:147 > 9 remove_fd_in_watch (chr=chr@entry=0x562e9d5f3800) at ../chardev/char-io.c:153 > 10 0x0000562e9ba23ffb in update_ioc_handlers (s=0x562e9d5f3800) at ../chardev/char-socket.c:592 > 11 0x0000562e9ba2072f in qemu_chr_fe_set_handlers_full at ../chardev/char-fe.c:279 > 12 0x0000562e9ba207a9 in qemu_chr_fe_set_handlers at ../chardev/char-fe.c:304 > 13 0x0000562e9ba2ca75 in monitor_qmp_setup_handlers_bh (opaque=0x562e9d4c2c60) at ../monitor/qmp.c:509 > 14 0x0000562e9bb6222e in aio_bh_poll (ctx=ctx@entry=0x562e9d4c2f20) at ../util/async.c:216 > 15 0x0000562e9bb4de0a in aio_poll (ctx=0x562e9d4c2f20, blocking=blocking@entry=true) at ../util/aio-posix.c:722 > 16 0x0000562e9b99dfaa in iothread_run (opaque=0x562e9d4c26f0) at ../iothread.c:63 > 17 0x0000562e9bb505a4 in qemu_thread_start (args=0x562e9d4c7ea0) at ../util/qemu-thread-posix.c:543 > 18 0x00007f16c70081ca in start_thread (arg=<optimized out>) at pthread_create.c:479 > 19 0x00007f16c6c398d3 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 > > io_remove_watch_poll(), which makes sure that iwp->src is NULL, calls > g_source_destroy() which finds that iwp->src is not NULL in the finalize > callback. This can only happen if another thread has managed to trigger > io_watch_poll_prepare() callback in the meantime. > > Introduce a mutex and a boolean variable to prevent other threads > creating a watch in io_watch_poll_prepare() in case that the IOWatchPoll > itself is about to get destroyed. > > Signed-off-by: Sergey Dyasli <sergey.dyasli@nutanix.com> > --- > chardev/char-io.c | 21 +++++++++++++++++++++ > 1 file changed, 21 insertions(+) > > diff --git a/chardev/char-io.c b/chardev/char-io.c > index dab77b112e35..b1edccf0cc85 100644 > --- a/chardev/char-io.c > +++ b/chardev/char-io.c > @@ -34,6 +34,9 @@ typedef struct IOWatchPoll { > GSourceFunc fd_read; > void *opaque; > GMainContext *context; > + > + QemuMutex mut; > + bool dead; > } IOWatchPoll; > > static IOWatchPoll *io_watch_poll_from_source(GSource *source) > @@ -62,10 +65,20 @@ static gboolean io_watch_poll_prepare(GSource *source, > * more data. > */ > if (now_active) { > + qemu_mutex_lock(&iwp->mut); > + > + /* Don't create a watch if we are about to be destroyed. */ > + if (iwp->dead) { > + qemu_mutex_unlock(&iwp->mut); > + return FALSE; > + } > + > iwp->src = qio_channel_create_watch( > iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL); > g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL); > g_source_attach(iwp->src, iwp->context); > + > + qemu_mutex_unlock(&iwp->mut); > } else { > g_source_destroy(iwp->src); > g_source_unref(iwp->src); > @@ -97,6 +110,7 @@ static void io_watch_poll_finalize(GSource *source) > */ > IOWatchPoll *iwp = io_watch_poll_from_source(source); > assert(iwp->src == NULL); > + qemu_mutex_destroy(&iwp->mut); > } > > static GSourceFuncs io_watch_poll_funcs = { > @@ -124,6 +138,8 @@ GSource *io_add_watch_poll(Chardev *chr, > iwp->fd_read = (GSourceFunc) fd_read; > iwp->src = NULL; > iwp->context = context; > + qemu_mutex_init(&iwp->mut); > + iwp->dead = false; > > name = g_strdup_printf("chardev-iowatch-%s", chr->label); > g_source_set_name((GSource *)iwp, name); > @@ -139,11 +155,16 @@ static void io_remove_watch_poll(GSource *source) > IOWatchPoll *iwp; > > iwp = io_watch_poll_from_source(source); > + > + qemu_mutex_lock(&iwp->mut); > + iwp->dead = true; > if (iwp->src) { > g_source_destroy(iwp->src); > g_source_unref(iwp->src); > iwp->src = NULL; > } > + qemu_mutex_unlock(&iwp->mut); > + > g_source_destroy(&iwp->parent); > } >
On 11/07/2024 12:23, Paolo Bonzini wrote: > So I think we should just revert commit 2b316774f60, which is not hard > to do (if it works) even if the code has since moved from qemu-char.c to > chardev/char-io.c. Hi Paolo, Thanks for the suggestion - I've tried it and it seems to work. I'll send out a new version of the patch shortly. Thanks, Sergey
© 2016 - 2024 Red Hat, Inc.