hw/block/dataplane/xen-block.c | 20 ++++++++++++++++++-- hw/xen/xen-bus.c | 27 +++++++++++++++++++++++---- include/hw/xen/xen-bus.h | 5 ++++- 3 files changed, 45 insertions(+), 7 deletions(-)
It is not safe to close an event channel from the QEMU main thread when
that channel's poller is running in IOThread context.
This patch adds a new xen_device_set_event_channel_context() function
to explicitly assign the channel AioContext, and modifies
xen_device_bind_event_channel() to initially assign the channel's poller
to the QEMU main thread context. The code in xen-block's dataplane is
then modified to assign the channel to IOThread context during
xen_block_dataplane_start() and de-assign it during in
xen_block_dataplane_stop(), such that the channel is always assigned
back to main thread context before it is closed. aio_set_fd_handler()
already deals with all the necessary synchronization when moving an fd
between AioContext-s so no extra code is needed to manage this.
Reported-by: Julien Grall <jgrall@amazon.com>
Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Anthony Perard <anthony.perard@citrix.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Max Reitz <mreitz@redhat.com>
Tested against an HVM debian guest with a QCOW2 image as system disk, and
as a hot-plugged/unplgged secondary disk.
---
hw/block/dataplane/xen-block.c | 20 ++++++++++++++++++--
hw/xen/xen-bus.c | 27 +++++++++++++++++++++++----
include/hw/xen/xen-bus.h | 5 ++++-
3 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 3b9caeb2fa..288a87a814 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -685,12 +685,24 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
return;
}
+ xendev = dataplane->xendev;
+
aio_context_acquire(dataplane->ctx);
+ if (dataplane->event_channel) {
+ /* Only reason for failure is a NULL channel */
+ xen_device_set_event_channel_context(xendev, dataplane->event_channel,
+ qemu_get_aio_context(),
+ &error_abort);
+ }
/* Xen doesn't have multiple users for nodes, so this can't fail */
blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort);
aio_context_release(dataplane->ctx);
- xendev = dataplane->xendev;
+ /*
+ * Now that the context has been moved onto the main thread, cancel
+ * further processing.
+ */
+ qemu_bh_cancel(dataplane->bh);
if (dataplane->event_channel) {
Error *local_err = NULL;
@@ -807,7 +819,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
}
dataplane->event_channel =
- xen_device_bind_event_channel(xendev, dataplane->ctx, event_channel,
+ xen_device_bind_event_channel(xendev, event_channel,
xen_block_dataplane_event, dataplane,
&local_err);
if (local_err) {
@@ -818,7 +830,11 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
aio_context_acquire(dataplane->ctx);
/* If other users keep the BlockBackend in the iothread, that's ok */
blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL);
+ /* Only reason for failure is a NULL channel */
+ xen_device_set_event_channel_context(xendev, dataplane->event_channel,
+ dataplane->ctx, &error_abort);
aio_context_release(dataplane->ctx);
+
return;
stop:
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index c2ad22a42d..349856b32b 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -1089,8 +1089,26 @@ static void xen_device_event(void *opaque)
}
}
+void xen_device_set_event_channel_context(XenDevice *xendev,
+ XenEventChannel *channel,
+ AioContext *ctx,
+ Error **errp)
+{
+ if (!channel) {
+ error_setg(errp, "bad channel");
+ return;
+ }
+
+ if (channel->ctx)
+ aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+ NULL, NULL, NULL, NULL);
+
+ channel->ctx = ctx;
+ aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+ xen_device_event, NULL, xen_device_poll, channel);
+}
+
XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
- AioContext *ctx,
unsigned int port,
XenEventHandler handler,
void *opaque, Error **errp)
@@ -1116,9 +1134,10 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
channel->handler = handler;
channel->opaque = opaque;
- channel->ctx = ctx;
- aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
- xen_device_event, NULL, xen_device_poll, channel);
+ /* Only reason for failure is a NULL channel */
+ xen_device_set_event_channel_context(xendev, channel,
+ qemu_get_aio_context(),
+ &error_abort);
QLIST_INSERT_HEAD(&xendev->event_channels, channel, list);
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
index 3d5532258d..c18c1372af 100644
--- a/include/hw/xen/xen-bus.h
+++ b/include/hw/xen/xen-bus.h
@@ -128,10 +128,13 @@ void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain,
typedef bool (*XenEventHandler)(void *opaque);
XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
- AioContext *ctx,
unsigned int port,
XenEventHandler handler,
void *opaque, Error **errp);
+void xen_device_set_event_channel_context(XenDevice *xendev,
+ XenEventChannel *channel,
+ AioContext *ctx,
+ Error **errp);
void xen_device_notify_event_channel(XenDevice *xendev,
XenEventChannel *channel,
Error **errp);
--
2.20.1
On Mon, Dec 16, 2019 at 02:34:51PM +0000, Paul Durrant wrote: > It is not safe to close an event channel from the QEMU main thread when > that channel's poller is running in IOThread context. > > This patch adds a new xen_device_set_event_channel_context() function > to explicitly assign the channel AioContext, and modifies > xen_device_bind_event_channel() to initially assign the channel's poller > to the QEMU main thread context. The code in xen-block's dataplane is > then modified to assign the channel to IOThread context during > xen_block_dataplane_start() and de-assign it during in > xen_block_dataplane_stop(), such that the channel is always assigned > back to main thread context before it is closed. aio_set_fd_handler() > already deals with all the necessary synchronization when moving an fd > between AioContext-s so no extra code is needed to manage this. > > Reported-by: Julien Grall <jgrall@amazon.com> > Signed-off-by: Paul Durrant <pdurrant@amazon.com> Reviewed-by: Anthony PERARD <anthony.perard@citrix.com> > Tested against an HVM debian guest with a QCOW2 image as system disk, and > as a hot-plugged/unplgged secondary disk. And I've run an osstest flight with the patch. Thanks, -- Anthony PERARD _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Hi Anthony, On 19/12/2019 17:11, Anthony PERARD wrote: > On Mon, Dec 16, 2019 at 02:34:51PM +0000, Paul Durrant wrote: >> It is not safe to close an event channel from the QEMU main thread when >> that channel's poller is running in IOThread context. >> >> This patch adds a new xen_device_set_event_channel_context() function >> to explicitly assign the channel AioContext, and modifies >> xen_device_bind_event_channel() to initially assign the channel's poller >> to the QEMU main thread context. The code in xen-block's dataplane is >> then modified to assign the channel to IOThread context during >> xen_block_dataplane_start() and de-assign it during in >> xen_block_dataplane_stop(), such that the channel is always assigned >> back to main thread context before it is closed. aio_set_fd_handler() >> already deals with all the necessary synchronization when moving an fd >> between AioContext-s so no extra code is needed to manage this. >> >> Reported-by: Julien Grall <jgrall@amazon.com> >> Signed-off-by: Paul Durrant <pdurrant@amazon.com> > > Reviewed-by: Anthony PERARD <anthony.perard@citrix.com> I can't find the patch in QEMU upstream. Are we missing any ack/review for this patch? > >> Tested against an HVM debian guest with a QCOW2 image as system disk, and >> as a hot-plugged/unplgged secondary disk. > > And I've run an osstest flight with the patch. > > Thanks, > Cheers, -- Julien Grall
On Wed, Jan 29, 2020 at 10:22:14PM +0000, Julien Grall wrote: > Hi Anthony, > > On 19/12/2019 17:11, Anthony PERARD wrote: > > On Mon, Dec 16, 2019 at 02:34:51PM +0000, Paul Durrant wrote: > > > It is not safe to close an event channel from the QEMU main thread when > > > that channel's poller is running in IOThread context. > > > > > > This patch adds a new xen_device_set_event_channel_context() function > > > to explicitly assign the channel AioContext, and modifies > > > xen_device_bind_event_channel() to initially assign the channel's poller > > > to the QEMU main thread context. The code in xen-block's dataplane is > > > then modified to assign the channel to IOThread context during > > > xen_block_dataplane_start() and de-assign it during in > > > xen_block_dataplane_stop(), such that the channel is always assigned > > > back to main thread context before it is closed. aio_set_fd_handler() > > > already deals with all the necessary synchronization when moving an fd > > > between AioContext-s so no extra code is needed to manage this. > > > > > > Reported-by: Julien Grall <jgrall@amazon.com> > > > Signed-off-by: Paul Durrant <pdurrant@amazon.com> > > > > Reviewed-by: Anthony PERARD <anthony.perard@citrix.com> > > I can't find the patch in QEMU upstream. Are we missing any ack/review for > this patch? No, I just need to prepare a pull request. It's in my list of patch for upstream, so there will be a pull request at some point before the next QEMU release. Cheers, -- Anthony PERARD
© 2016 - 2024 Red Hat, Inc.