Cleaning up offine XenDevice objects directly in
xen_device_backend_changed() is dangerous as xen_device_unrealize() will
modify the watch list that is being walked. Even the QLIST_FOREACH_SAFE()
used in notifier_list_notify() is insufficient as *two* notifiers (for
the frontend and backend watches) are removed, thus potentially rendering
the 'next' pointer unsafe.
The solution is to use the XenBus backend_watch handler to do the clean-up
instead, as it is invoked whilst walking a separate watch list.
This patch therefore adds a new 'offline_devices' list to XenBus, to which
offline devices are added by xen_device_backend_changed(). The XenBus
backend_watch registration is also changed to not only invoke
xen_bus_enumerate() but also a new xen_bus_cleanup() function, which will
walk 'offline_devices' and perform the necessary actions.
For safety a an extra 'online' check is also added to
xen_bus_type_enumerate() to make sure that no attempt is made to create a
new XenDevice object for a backend that is offline.
NOTE: This patch also include some cosmetic changes:
- substitute the local variable name 'backend_state'
in xen_bus_type_enumerate() with 'state', since there
is no ambiguity with any other state in that context.
- change xen_device_state_is_active() to
xen_device_frontend_is_active() (and pass a XenDevice directly)
since the state tests contained therein only apply to a frontend.
- use 'state' rather then 'xendev->backend_state' in
xen_device_backend_changed() to shorten the code.
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Anthony Perard <anthony.perard@citrix.com>
v2:
- Make sure we don't try to add a XenDevice to 'offline_devices' more than
once
---
hw/xen/trace-events | 2 +
hw/xen/xen-bus.c | 92 +++++++++++++++++++++++++++++-----------
include/hw/xen/xen-bus.h | 2 +
3 files changed, 71 insertions(+), 25 deletions(-)
diff --git a/hw/xen/trace-events b/hw/xen/trace-events
index 80ce3dafad..e6885bc751 100644
--- a/hw/xen/trace-events
+++ b/hw/xen/trace-events
@@ -17,8 +17,10 @@ xen_domid_restrict(int err) "err: %u"
xen_bus_realize(void) ""
xen_bus_unrealize(void) ""
xen_bus_enumerate(void) ""
+xen_bus_cleanup(void) ""
xen_bus_type_enumerate(const char *type) "type: %s"
xen_bus_backend_create(const char *type, const char *path) "type: %s path: %s"
+xen_bus_device_cleanup(const char *type, char *name) "type: %s name: %s"
xen_bus_add_watch(const char *node, const char *key) "node: %s key: %s"
xen_bus_remove_watch(const char *node, const char *key) "node: %s key: %s"
xen_device_realize(const char *type, char *name) "type: %s name: %s"
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index 810a4e2df3..43e29b8771 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -340,13 +340,18 @@ static void xen_bus_type_enumerate(XenBus *xenbus, const char *type)
for (i = 0; i < n; i++) {
char *backend_path = g_strdup_printf("%s/%s", domain_path,
backend[i]);
- enum xenbus_state backend_state;
+ enum xenbus_state state;
+ unsigned int online;
if (xs_node_scanf(xenbus->xsh, XBT_NULL, backend_path, "state",
- NULL, "%u", &backend_state) != 1)
- backend_state = XenbusStateUnknown;
+ NULL, "%u", &state) != 1)
+ state = XenbusStateUnknown;
- if (backend_state == XenbusStateInitialising) {
+ if (xs_node_scanf(xenbus->xsh, XBT_NULL, backend_path, "online",
+ NULL, "%u", &online) != 1)
+ online = 0;
+
+ if (online && state == XenbusStateInitialising) {
Error *local_err = NULL;
xen_bus_backend_create(xenbus, type, backend[i], backend_path,
@@ -365,9 +370,8 @@ out:
g_free(domain_path);
}
-static void xen_bus_enumerate(void *opaque)
+static void xen_bus_enumerate(XenBus *xenbus)
{
- XenBus *xenbus = opaque;
char **type;
unsigned int i, n;
@@ -385,6 +389,44 @@ static void xen_bus_enumerate(void *opaque)
free(type);
}
+static void xen_bus_device_cleanup(XenDevice *xendev)
+{
+ const char *type = object_get_typename(OBJECT(xendev));
+ Error *local_err = NULL;
+
+ trace_xen_bus_device_cleanup(type, xendev->name);
+
+ g_assert(!xendev->backend_online);
+
+ if (!xen_backend_try_device_destroy(xendev, &local_err)) {
+ object_unparent(OBJECT(xendev));
+ }
+
+ if (local_err) {
+ error_report_err(local_err);
+ }
+}
+
+static void xen_bus_cleanup(XenBus *xenbus)
+{
+ XenDevice *xendev, *next;
+
+ trace_xen_bus_cleanup();
+
+ QLIST_FOREACH_SAFE(xendev, &xenbus->offline_devices, list, next) {
+ QLIST_REMOVE(xendev, list);
+ xen_bus_device_cleanup(xendev);
+ }
+}
+
+static void xen_bus_backend_changed(void *opaque)
+{
+ XenBus *xenbus = opaque;
+
+ xen_bus_enumerate(xenbus);
+ xen_bus_cleanup(xenbus);
+}
+
static void xen_bus_unrealize(BusState *bus, Error **errp)
{
XenBus *xenbus = XEN_BUS(bus);
@@ -433,7 +475,7 @@ static void xen_bus_realize(BusState *bus, Error **errp)
xenbus->backend_watch =
xen_bus_add_watch(xenbus, "", /* domain root node */
- "backend", xen_bus_enumerate, &local_err);
+ "backend", xen_bus_backend_changed, &local_err);
if (local_err) {
/* This need not be treated as a hard error so don't propagate */
error_reportf_err(local_err,
@@ -555,9 +597,9 @@ static void xen_device_backend_set_online(XenDevice *xendev, bool online)
* Tell from the state whether the frontend is likely alive,
* i.e. it will react to a change of state of the backend.
*/
-static bool xen_device_state_is_active(enum xenbus_state state)
+static bool xen_device_frontend_is_active(XenDevice *xendev)
{
- switch (state) {
+ switch (xendev->frontend_state) {
case XenbusStateInitWait:
case XenbusStateInitialised:
case XenbusStateConnected:
@@ -594,30 +636,30 @@ static void xen_device_backend_changed(void *opaque)
* state to Closing, but there is no active frontend then set the
* backend state to Closed.
*/
- if (xendev->backend_state == XenbusStateClosing &&
- !xen_device_state_is_active(xendev->frontend_state)) {
+ if (state == XenbusStateClosing &&
+ !xen_device_frontend_is_active(xendev)) {
xen_device_backend_set_state(xendev, XenbusStateClosed);
}
/*
* If a backend is still 'online' then we should leave it alone but,
- * if a backend is not 'online', then the device should be destroyed
- * once the state is Closed.
+ * if a backend is not 'online', then the device is a candidate
+ * for destruction. Hence add it to the 'offline' list to be cleaned
+ * by xen_bus_cleanup().
*/
- if (!xendev->backend_online &&
- (xendev->backend_state == XenbusStateClosed ||
- xendev->backend_state == XenbusStateInitialising ||
- xendev->backend_state == XenbusStateInitWait ||
- xendev->backend_state == XenbusStateUnknown)) {
- Error *local_err = NULL;
+ if (!online &&
+ (state == XenbusStateClosed || state == XenbusStateInitialising ||
+ state == XenbusStateInitWait || state == XenbusStateUnknown) &&
+ !QLIST_NEXT(xendev, list)) {
+ XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
- if (!xen_backend_try_device_destroy(xendev, &local_err)) {
- object_unparent(OBJECT(xendev));
- }
+ QLIST_INSERT_HEAD(&xenbus->offline_devices, xendev, list);
- if (local_err) {
- error_report_err(local_err);
- }
+ /*
+ * Re-write the state to cause a XenBus backend_watch notification,
+ * resulting in a call to xen_bus_cleanup().
+ */
+ xen_device_backend_printf(xendev, "state", "%u", state);
}
}
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
index 0d198148f6..15d71aff70 100644
--- a/include/hw/xen/xen-bus.h
+++ b/include/hw/xen/xen-bus.h
@@ -33,6 +33,7 @@ typedef struct XenDevice {
xengnttab_handle *xgth;
bool feature_grant_copy;
QLIST_HEAD(, XenEventChannel) event_channels;
+ QLIST_ENTRY(XenDevice) list;
} XenDevice;
typedef char *(*XenDeviceGetName)(XenDevice *xendev, Error **errp);
@@ -68,6 +69,7 @@ typedef struct XenBus {
struct xs_handle *xsh;
XenWatchList *watch_list;
XenWatch *backend_watch;
+ QLIST_HEAD(, XenDevice) offline_devices;
} XenBus;
typedef struct XenBusClass {
--
2.20.1.2.gb21ebb6
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On Thu, Sep 12, 2019 at 01:16:46PM +0100, Paul Durrant wrote:
> Cleaning up offine XenDevice objects directly in
^ offline
> xen_device_backend_changed() is dangerous as xen_device_unrealize() will
> modify the watch list that is being walked. Even the QLIST_FOREACH_SAFE()
> used in notifier_list_notify() is insufficient as *two* notifiers (for
> the frontend and backend watches) are removed, thus potentially rendering
> the 'next' pointer unsafe.
>
> The solution is to use the XenBus backend_watch handler to do the clean-up
> instead, as it is invoked whilst walking a separate watch list.
>
> This patch therefore adds a new 'offline_devices' list to XenBus, to which
> offline devices are added by xen_device_backend_changed(). The XenBus
> backend_watch registration is also changed to not only invoke
> xen_bus_enumerate() but also a new xen_bus_cleanup() function, which will
> walk 'offline_devices' and perform the necessary actions.
> For safety a an extra 'online' check is also added to
^ one 'a' too many?
> xen_bus_type_enumerate() to make sure that no attempt is made to create a
> new XenDevice object for a backend that is offline.
>
> NOTE: This patch also include some cosmetic changes:
> - substitute the local variable name 'backend_state'
> in xen_bus_type_enumerate() with 'state', since there
> is no ambiguity with any other state in that context.
> - change xen_device_state_is_active() to
> xen_device_frontend_is_active() (and pass a XenDevice directly)
> since the state tests contained therein only apply to a frontend.
> - use 'state' rather then 'xendev->backend_state' in
> xen_device_backend_changed() to shorten the code.
>
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> ---
>
> v2:
> - Make sure we don't try to add a XenDevice to 'offline_devices' more than
> once
> ---
>
> /*
> * If a backend is still 'online' then we should leave it alone but,
> - * if a backend is not 'online', then the device should be destroyed
> - * once the state is Closed.
> + * if a backend is not 'online', then the device is a candidate
> + * for destruction. Hence add it to the 'offline' list to be cleaned
> + * by xen_bus_cleanup().
> */
> - if (!xendev->backend_online &&
> - (xendev->backend_state == XenbusStateClosed ||
> - xendev->backend_state == XenbusStateInitialising ||
> - xendev->backend_state == XenbusStateInitWait ||
> - xendev->backend_state == XenbusStateUnknown)) {
> - Error *local_err = NULL;
> + if (!online &&
> + (state == XenbusStateClosed || state == XenbusStateInitialising ||
> + state == XenbusStateInitWait || state == XenbusStateUnknown) &&
> + !QLIST_NEXT(xendev, list)) {
Could you add a note about this QLIST_NEXT? I don't think it's going to
be obvious enough why we check that there are no next item. I might only
understand it just because of your reply to the v1 of the patch.
(Well the changelog of the patch also point out what it's for.)
> + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
>
> - if (!xen_backend_try_device_destroy(xendev, &local_err)) {
> - object_unparent(OBJECT(xendev));
> - }
> + QLIST_INSERT_HEAD(&xenbus->offline_devices, xendev, list);
>
> - if (local_err) {
> - error_report_err(local_err);
> - }
> + /*
> + * Re-write the state to cause a XenBus backend_watch notification,
> + * resulting in a call to xen_bus_cleanup().
> + */
> + xen_device_backend_printf(xendev, "state", "%u", state);
It kind of feels wrong to rely on xenstore to notify QEMU's xenbus
driver that a device needs cleanup. But that does the job.
With a note about the use of QLIST_NEXT and the few typo fixed, the
patch will be good to go.
Thanks,
--
Anthony PERARD
> -----Original Message-----
> From: Anthony PERARD <anthony.perard@citrix.com>
> Sent: 12 September 2019 16:04
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: qemu-devel@nongnu.org; xen-devel@lists.xenproject.org; Stefano Stabellini <sstabellini@kernel.org>
> Subject: Re: [PATCH v2 3/3] xen: perform XenDevice clean-up in XenBus watch handler
>
> On Thu, Sep 12, 2019 at 01:16:46PM +0100, Paul Durrant wrote:
> > Cleaning up offine XenDevice objects directly in
> ^ offline
>
> > xen_device_backend_changed() is dangerous as xen_device_unrealize() will
> > modify the watch list that is being walked. Even the QLIST_FOREACH_SAFE()
> > used in notifier_list_notify() is insufficient as *two* notifiers (for
> > the frontend and backend watches) are removed, thus potentially rendering
> > the 'next' pointer unsafe.
> >
> > The solution is to use the XenBus backend_watch handler to do the clean-up
> > instead, as it is invoked whilst walking a separate watch list.
> >
> > This patch therefore adds a new 'offline_devices' list to XenBus, to which
> > offline devices are added by xen_device_backend_changed(). The XenBus
> > backend_watch registration is also changed to not only invoke
> > xen_bus_enumerate() but also a new xen_bus_cleanup() function, which will
> > walk 'offline_devices' and perform the necessary actions.
> > For safety a an extra 'online' check is also added to
> ^ one 'a' too many?
>
> > xen_bus_type_enumerate() to make sure that no attempt is made to create a
> > new XenDevice object for a backend that is offline.
> >
> > NOTE: This patch also include some cosmetic changes:
> > - substitute the local variable name 'backend_state'
> > in xen_bus_type_enumerate() with 'state', since there
> > is no ambiguity with any other state in that context.
> > - change xen_device_state_is_active() to
> > xen_device_frontend_is_active() (and pass a XenDevice directly)
> > since the state tests contained therein only apply to a frontend.
> > - use 'state' rather then 'xendev->backend_state' in
> > xen_device_backend_changed() to shorten the code.
> >
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > ---
> >
> > v2:
> > - Make sure we don't try to add a XenDevice to 'offline_devices' more than
> > once
> > ---
> >
> > /*
> > * If a backend is still 'online' then we should leave it alone but,
> > - * if a backend is not 'online', then the device should be destroyed
> > - * once the state is Closed.
> > + * if a backend is not 'online', then the device is a candidate
> > + * for destruction. Hence add it to the 'offline' list to be cleaned
> > + * by xen_bus_cleanup().
> > */
> > - if (!xendev->backend_online &&
> > - (xendev->backend_state == XenbusStateClosed ||
> > - xendev->backend_state == XenbusStateInitialising ||
> > - xendev->backend_state == XenbusStateInitWait ||
> > - xendev->backend_state == XenbusStateUnknown)) {
> > - Error *local_err = NULL;
> > + if (!online &&
> > + (state == XenbusStateClosed || state == XenbusStateInitialising ||
> > + state == XenbusStateInitWait || state == XenbusStateUnknown) &&
> > + !QLIST_NEXT(xendev, list)) {
>
> Could you add a note about this QLIST_NEXT? I don't think it's going to
> be obvious enough why we check that there are no next item. I might only
> understand it just because of your reply to the v1 of the patch.
> (Well the changelog of the patch also point out what it's for.)
>
Sure, it's worth a comment.
> > + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
> >
> > - if (!xen_backend_try_device_destroy(xendev, &local_err)) {
> > - object_unparent(OBJECT(xendev));
> > - }
> > + QLIST_INSERT_HEAD(&xenbus->offline_devices, xendev, list);
> >
> > - if (local_err) {
> > - error_report_err(local_err);
> > - }
> > + /*
> > + * Re-write the state to cause a XenBus backend_watch notification,
> > + * resulting in a call to xen_bus_cleanup().
> > + */
> > + xen_device_backend_printf(xendev, "state", "%u", state);
>
> It kind of feels wrong to rely on xenstore to notify QEMU's xenbus
> driver that a device needs cleanup. But that does the job.
>
I had originally considered setting up an event notifier but that seemed like more fds than were strictly necessary ;-)
> With a note about the use of QLIST_NEXT and the few typo fixed, the
> patch will be good to go.
>
Cool. I'll tidy up and send a v3.
Paul
> Thanks,
>
> --
> Anthony PERARD
> -----Original Message-----
> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of Paul Durrant
> Sent: 12 September 2019 16:16
> To: Anthony Perard <anthony.perard@citrix.com>
> Cc: xen-devel@lists.xenproject.org; Stefano Stabellini <sstabellini@kernel.org>; qemu-devel@nongnu.org
> Subject: Re: [Xen-devel] [PATCH v2 3/3] xen: perform XenDevice clean-up in XenBus watch handler
>
> > -----Original Message-----
> > From: Anthony PERARD <anthony.perard@citrix.com>
> > Sent: 12 September 2019 16:04
> > To: Paul Durrant <Paul.Durrant@citrix.com>
> > Cc: qemu-devel@nongnu.org; xen-devel@lists.xenproject.org; Stefano Stabellini
> <sstabellini@kernel.org>
> > Subject: Re: [PATCH v2 3/3] xen: perform XenDevice clean-up in XenBus watch handler
> >
> > On Thu, Sep 12, 2019 at 01:16:46PM +0100, Paul Durrant wrote:
> > > Cleaning up offine XenDevice objects directly in
> > ^ offline
> >
> > > xen_device_backend_changed() is dangerous as xen_device_unrealize() will
> > > modify the watch list that is being walked. Even the QLIST_FOREACH_SAFE()
> > > used in notifier_list_notify() is insufficient as *two* notifiers (for
> > > the frontend and backend watches) are removed, thus potentially rendering
> > > the 'next' pointer unsafe.
> > >
> > > The solution is to use the XenBus backend_watch handler to do the clean-up
> > > instead, as it is invoked whilst walking a separate watch list.
> > >
> > > This patch therefore adds a new 'offline_devices' list to XenBus, to which
> > > offline devices are added by xen_device_backend_changed(). The XenBus
> > > backend_watch registration is also changed to not only invoke
> > > xen_bus_enumerate() but also a new xen_bus_cleanup() function, which will
> > > walk 'offline_devices' and perform the necessary actions.
> > > For safety a an extra 'online' check is also added to
> > ^ one 'a' too many?
> >
> > > xen_bus_type_enumerate() to make sure that no attempt is made to create a
> > > new XenDevice object for a backend that is offline.
> > >
> > > NOTE: This patch also include some cosmetic changes:
> > > - substitute the local variable name 'backend_state'
> > > in xen_bus_type_enumerate() with 'state', since there
> > > is no ambiguity with any other state in that context.
> > > - change xen_device_state_is_active() to
> > > xen_device_frontend_is_active() (and pass a XenDevice directly)
> > > since the state tests contained therein only apply to a frontend.
> > > - use 'state' rather then 'xendev->backend_state' in
> > > xen_device_backend_changed() to shorten the code.
> > >
> > > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > > ---
> > >
> > > v2:
> > > - Make sure we don't try to add a XenDevice to 'offline_devices' more than
> > > once
> > > ---
> > >
> > > /*
> > > * If a backend is still 'online' then we should leave it alone but,
> > > - * if a backend is not 'online', then the device should be destroyed
> > > - * once the state is Closed.
> > > + * if a backend is not 'online', then the device is a candidate
> > > + * for destruction. Hence add it to the 'offline' list to be cleaned
> > > + * by xen_bus_cleanup().
> > > */
> > > - if (!xendev->backend_online &&
> > > - (xendev->backend_state == XenbusStateClosed ||
> > > - xendev->backend_state == XenbusStateInitialising ||
> > > - xendev->backend_state == XenbusStateInitWait ||
> > > - xendev->backend_state == XenbusStateUnknown)) {
> > > - Error *local_err = NULL;
> > > + if (!online &&
> > > + (state == XenbusStateClosed || state == XenbusStateInitialising ||
> > > + state == XenbusStateInitWait || state == XenbusStateUnknown) &&
> > > + !QLIST_NEXT(xendev, list)) {
> >
> > Could you add a note about this QLIST_NEXT? I don't think it's going to
> > be obvious enough why we check that there are no next item. I might only
> > understand it just because of your reply to the v1 of the patch.
> > (Well the changelog of the patch also point out what it's for.)
> >
>
> Sure, it's worth a comment.
Actually, on closer inspection !QLIST_NEXT() is an insufficient checked. I had assumed that QLISTs were full doubly-linked lists but they are not; the last element on a list will still have a NULL next pointer. It will be sufficient, and also clearer to the reader, if I add a boolean to XenDevice which is set when it is added to the list.
Paul
© 2016 - 2025 Red Hat, Inc.