[Qemu-devel] [PATCH for-2.11] spapr: reset DRCs after devices

Greg Kurz posted 1 patch 6 years, 4 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/151092340873.11818.16813698420232981002.stgit@bahia
Test checkpatch passed
Test docker passed
Test ppc passed
Test s390x passed
hw/ppc/spapr.c     |   21 +++++++++++++++++++++
hw/ppc/spapr_drc.c |    7 -------
2 files changed, 21 insertions(+), 7 deletions(-)
[Qemu-devel] [PATCH for-2.11] spapr: reset DRCs after devices
Posted by Greg Kurz 6 years, 4 months ago
A DRC with a pending unplug request releases its associated device at
machine reset time.

In the case of LMB, when all DRCs for a DIMM device have been reset,
the DIMM gets unplugged, causing guest memory to disappear. This may
be very confusing for anything still using this memory.

This is exactly what happens with vhost backends, and QEMU aborts
with:

qemu-system-ppc64: used ring relocated for ring 2
qemu-system-ppc64: qemu/hw/virtio/vhost.c:649: vhost_commit: Assertion
 `r >= 0' failed.

The issue is that each DRC registers a QEMU reset handler, and we
don't control the order in which these handlers are called (ie,
a LMB DRC will unplug a DIMM before the virtio device using the
memory on this DIMM could stop its vhost backend).

To avoid such situations, let's reset DRCs after all devices
have been reset.

Reported-by: Mallesh N. Koti <mallesh@linux.vnet.ibm.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
---
 hw/ppc/spapr.c     |   21 +++++++++++++++++++++
 hw/ppc/spapr_drc.c |    7 -------
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6841bd294b3c..6285f7211f9a 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1411,6 +1411,19 @@ static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
     }
 }
 
+static int spapr_reset_drcs(Object *child, void *opaque)
+{
+    sPAPRDRConnector *drc =
+        (sPAPRDRConnector *) object_dynamic_cast(child,
+                                                 TYPE_SPAPR_DR_CONNECTOR);
+
+    if (drc) {
+        spapr_drc_reset(drc);
+    }
+
+    return 0;
+}
+
 static void ppc_spapr_reset(void)
 {
     MachineState *machine = MACHINE(qdev_get_machine());
@@ -1434,6 +1447,14 @@ static void ppc_spapr_reset(void)
     }
 
     qemu_devices_reset();
+
+    /* DRC reset may cause a device to be unplugged. This will cause troubles
+     * if this device is used by another device (eg, a running vhost backend
+     * will crash QEMU if the DIMM holding the vring goes away). To avoid such
+     * situations, we reset DRCs after all devices have been reset.
+     */
+    object_child_foreach_recursive(object_get_root(), spapr_reset_drcs, NULL);
+
     spapr_clear_pending_events(spapr);
 
     /*
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 915e9b51c40c..e3b122968e89 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -455,11 +455,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
     }
 }
 
-static void drc_reset(void *opaque)
-{
-    spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque));
-}
-
 bool spapr_drc_needed(void *opaque)
 {
     sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
@@ -518,7 +513,6 @@ static void realize(DeviceState *d, Error **errp)
     }
     vmstate_register(DEVICE(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
                      drc);
-    qemu_register_reset(drc_reset, drc);
     trace_spapr_drc_realize_complete(spapr_drc_index(drc));
 }
 
@@ -529,7 +523,6 @@ static void unrealize(DeviceState *d, Error **errp)
     gchar *name;
 
     trace_spapr_drc_unrealize(spapr_drc_index(drc));
-    qemu_unregister_reset(drc_reset, drc);
     vmstate_unregister(DEVICE(drc), &vmstate_spapr_drc, drc);
     root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
     name = g_strdup_printf("%x", spapr_drc_index(drc));


Re: [Qemu-devel] [Qemu-ppc] [PATCH for-2.11] spapr: reset DRCs after devices
Posted by Daniel Henrique Barboza 6 years, 4 months ago

On 11/17/2017 10:56 AM, Greg Kurz wrote:
> A DRC with a pending unplug request releases its associated device at
> machine reset time.
>
> In the case of LMB, when all DRCs for a DIMM device have been reset,
> the DIMM gets unplugged, causing guest memory to disappear. This may
> be very confusing for anything still using this memory.
>
> This is exactly what happens with vhost backends, and QEMU aborts
> with:
>
> qemu-system-ppc64: used ring relocated for ring 2
> qemu-system-ppc64: qemu/hw/virtio/vhost.c:649: vhost_commit: Assertion
>   `r >= 0' failed.
>
> The issue is that each DRC registers a QEMU reset handler, and we
> don't control the order in which these handlers are called (ie,
> a LMB DRC will unplug a DIMM before the virtio device using the
> memory on this DIMM could stop its vhost backend).
>
> To avoid such situations, let's reset DRCs after all devices
> have been reset.
>
> Reported-by: Mallesh N. Koti <mallesh@linux.vnet.ibm.com>
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
I believe this has to do with the problem discussed at the memory
unplug reboot with vhost=on, right?

I don't see any problem with this patch and it's probably the best solution
short-term for the problem (and potentially other related LMB release
problems), but that said, how hard it is to forbid LMB unplug at all if the
memory is being used in vhost?

The way I see it, the root problem is that a device unplug failed at the 
guest
side and we don't know about it, leaving drc->unplug_requested flags set
around the code when it shouldn't. Reading that thread I see a comment from
Michael S. Tsirkin saying that this bug is somewhat expected, that vhost 
backend will
not behave well if memory is going away. Unlike other LMB unplug 
failures from
the guest side that might happen for any other reason, this one sees 
predicable
and we can avoid it.

I think this is something worth considering. But for now,


Reviewed-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>

>   hw/ppc/spapr.c     |   21 +++++++++++++++++++++
>   hw/ppc/spapr_drc.c |    7 -------
>   2 files changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 6841bd294b3c..6285f7211f9a 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1411,6 +1411,19 @@ static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
>       }
>   }
>
> +static int spapr_reset_drcs(Object *child, void *opaque)
> +{
> +    sPAPRDRConnector *drc =
> +        (sPAPRDRConnector *) object_dynamic_cast(child,
> +                                                 TYPE_SPAPR_DR_CONNECTOR);
> +
> +    if (drc) {
> +        spapr_drc_reset(drc);
> +    }
> +
> +    return 0;
> +}
> +
>   static void ppc_spapr_reset(void)
>   {
>       MachineState *machine = MACHINE(qdev_get_machine());
> @@ -1434,6 +1447,14 @@ static void ppc_spapr_reset(void)
>       }
>
>       qemu_devices_reset();
> +
> +    /* DRC reset may cause a device to be unplugged. This will cause troubles
> +     * if this device is used by another device (eg, a running vhost backend
> +     * will crash QEMU if the DIMM holding the vring goes away). To avoid such
> +     * situations, we reset DRCs after all devices have been reset.
> +     */
> +    object_child_foreach_recursive(object_get_root(), spapr_reset_drcs, NULL);
> +
>       spapr_clear_pending_events(spapr);
>
>       /*
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index 915e9b51c40c..e3b122968e89 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -455,11 +455,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
>       }
>   }
>
> -static void drc_reset(void *opaque)
> -{
> -    spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque));
> -}
> -
>   bool spapr_drc_needed(void *opaque)
>   {
>       sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
> @@ -518,7 +513,6 @@ static void realize(DeviceState *d, Error **errp)
>       }
>       vmstate_register(DEVICE(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
>                        drc);
> -    qemu_register_reset(drc_reset, drc);
>       trace_spapr_drc_realize_complete(spapr_drc_index(drc));
>   }
>
> @@ -529,7 +523,6 @@ static void unrealize(DeviceState *d, Error **errp)
>       gchar *name;
>
>       trace_spapr_drc_unrealize(spapr_drc_index(drc));
> -    qemu_unregister_reset(drc_reset, drc);
>       vmstate_unregister(DEVICE(drc), &vmstate_spapr_drc, drc);
>       root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
>       name = g_strdup_printf("%x", spapr_drc_index(drc));
>
>


Re: [Qemu-devel] [Qemu-ppc] [PATCH for-2.11] spapr: reset DRCs after devices
Posted by Greg Kurz 6 years, 4 months ago
On Fri, 17 Nov 2017 14:21:27 -0200
Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> wrote:

> On 11/17/2017 10:56 AM, Greg Kurz wrote:
> > A DRC with a pending unplug request releases its associated device at
> > machine reset time.
> >
> > In the case of LMB, when all DRCs for a DIMM device have been reset,
> > the DIMM gets unplugged, causing guest memory to disappear. This may
> > be very confusing for anything still using this memory.
> >
> > This is exactly what happens with vhost backends, and QEMU aborts
> > with:
> >
> > qemu-system-ppc64: used ring relocated for ring 2
> > qemu-system-ppc64: qemu/hw/virtio/vhost.c:649: vhost_commit: Assertion
> >   `r >= 0' failed.
> >
> > The issue is that each DRC registers a QEMU reset handler, and we
> > don't control the order in which these handlers are called (ie,
> > a LMB DRC will unplug a DIMM before the virtio device using the
> > memory on this DIMM could stop its vhost backend).
> >
> > To avoid such situations, let's reset DRCs after all devices
> > have been reset.
> >
> > Reported-by: Mallesh N. Koti <mallesh@linux.vnet.ibm.com>
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---  
> I believe this has to do with the problem discussed at the memory
> unplug reboot with vhost=on, right?
> 
> I don't see any problem with this patch and it's probably the best solution
> short-term for the problem (and potentially other related LMB release
> problems), but that said, how hard it is to forbid LMB unplug at all if the
> memory is being used in vhost?
> 

I don't know yet but I'll dig some more.

> The way I see it, the root problem is that a device unplug failed at the 
> guest
> side and we don't know about it, leaving drc->unplug_requested flags set
> around the code when it shouldn't. Reading that thread I see a comment from
> Michael S. Tsirkin saying that this bug is somewhat expected, that vhost 
> backend will
> not behave well if memory is going away. Unlike other LMB unplug 
> failures from
> the guest side that might happen for any other reason, this one sees 
> predicable
> and we can avoid it.
> 
> I think this is something worth considering. But for now,
> 
> 
> Reviewed-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
> 

Thanks!

> >   hw/ppc/spapr.c     |   21 +++++++++++++++++++++
> >   hw/ppc/spapr_drc.c |    7 -------
> >   2 files changed, 21 insertions(+), 7 deletions(-)
> >
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 6841bd294b3c..6285f7211f9a 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -1411,6 +1411,19 @@ static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
> >       }
> >   }
> >
> > +static int spapr_reset_drcs(Object *child, void *opaque)
> > +{
> > +    sPAPRDRConnector *drc =
> > +        (sPAPRDRConnector *) object_dynamic_cast(child,
> > +                                                 TYPE_SPAPR_DR_CONNECTOR);
> > +
> > +    if (drc) {
> > +        spapr_drc_reset(drc);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >   static void ppc_spapr_reset(void)
> >   {
> >       MachineState *machine = MACHINE(qdev_get_machine());
> > @@ -1434,6 +1447,14 @@ static void ppc_spapr_reset(void)
> >       }
> >
> >       qemu_devices_reset();
> > +
> > +    /* DRC reset may cause a device to be unplugged. This will cause troubles
> > +     * if this device is used by another device (eg, a running vhost backend
> > +     * will crash QEMU if the DIMM holding the vring goes away). To avoid such
> > +     * situations, we reset DRCs after all devices have been reset.
> > +     */
> > +    object_child_foreach_recursive(object_get_root(), spapr_reset_drcs, NULL);
> > +
> >       spapr_clear_pending_events(spapr);
> >
> >       /*
> > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> > index 915e9b51c40c..e3b122968e89 100644
> > --- a/hw/ppc/spapr_drc.c
> > +++ b/hw/ppc/spapr_drc.c
> > @@ -455,11 +455,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
> >       }
> >   }
> >
> > -static void drc_reset(void *opaque)
> > -{
> > -    spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque));
> > -}
> > -
> >   bool spapr_drc_needed(void *opaque)
> >   {
> >       sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
> > @@ -518,7 +513,6 @@ static void realize(DeviceState *d, Error **errp)
> >       }
> >       vmstate_register(DEVICE(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
> >                        drc);
> > -    qemu_register_reset(drc_reset, drc);
> >       trace_spapr_drc_realize_complete(spapr_drc_index(drc));
> >   }
> >
> > @@ -529,7 +523,6 @@ static void unrealize(DeviceState *d, Error **errp)
> >       gchar *name;
> >
> >       trace_spapr_drc_unrealize(spapr_drc_index(drc));
> > -    qemu_unregister_reset(drc_reset, drc);
> >       vmstate_unregister(DEVICE(drc), &vmstate_spapr_drc, drc);
> >       root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
> >       name = g_strdup_printf("%x", spapr_drc_index(drc));
> >
> >  
> 


Re: [Qemu-devel] [Qemu-ppc] [PATCH for-2.11] spapr: reset DRCs after devices
Posted by Michael Roth 6 years, 4 months ago
Quoting Daniel Henrique Barboza (2017-11-17 10:21:27)
> 
> 
> On 11/17/2017 10:56 AM, Greg Kurz wrote:
> > A DRC with a pending unplug request releases its associated device at
> > machine reset time.
> >
> > In the case of LMB, when all DRCs for a DIMM device have been reset,
> > the DIMM gets unplugged, causing guest memory to disappear. This may
> > be very confusing for anything still using this memory.
> >
> > This is exactly what happens with vhost backends, and QEMU aborts
> > with:
> >
> > qemu-system-ppc64: used ring relocated for ring 2
> > qemu-system-ppc64: qemu/hw/virtio/vhost.c:649: vhost_commit: Assertion
> >   `r >= 0' failed.
> >
> > The issue is that each DRC registers a QEMU reset handler, and we
> > don't control the order in which these handlers are called (ie,
> > a LMB DRC will unplug a DIMM before the virtio device using the
> > memory on this DIMM could stop its vhost backend).
> >
> > To avoid such situations, let's reset DRCs after all devices
> > have been reset.
> >
> > Reported-by: Mallesh N. Koti <mallesh@linux.vnet.ibm.com>
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---
> I believe this has to do with the problem discussed at the memory
> unplug reboot with vhost=on, right?
> 
> I don't see any problem with this patch and it's probably the best solution
> short-term for the problem (and potentially other related LMB release
> problems), but that said, how hard it is to forbid LMB unplug at all if the
> memory is being used in vhost?
> 
> The way I see it, the root problem is that a device unplug failed at the 
> guest
> side and we don't know about it, leaving drc->unplug_requested flags set
> around the code when it shouldn't. Reading that thread I see a comment from

I'm not sure we want to go down the road of preemptively aborting
specific cases where we think unplug will fail in the guest. We already
need to deal with the cases where we don't know whether they'll fail
ahead of time, so it seems like more of a headache to add special cases
on top of that that management would need to deal with.

And in this particular case we'd never be able to unplug the DIMM if
the guest happens to use it for the vring each boot (unless maybe the
unplug request occured during early boot), even though there's no
technical reason we shouldn't be able to at least handle it on the
next reset.

> Michael S. Tsirkin saying that this bug is somewhat expected, that vhost 
> backend will
> not behave well if memory is going away. Unlike other LMB unplug 
> failures from
> the guest side that might happen for any other reason, this one sees 
> predicable
> and we can avoid it.
> 
> I think this is something worth considering. But for now,
> 
> 
> Reviewed-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
> 
> >   hw/ppc/spapr.c     |   21 +++++++++++++++++++++
> >   hw/ppc/spapr_drc.c |    7 -------
> >   2 files changed, 21 insertions(+), 7 deletions(-)
> >
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 6841bd294b3c..6285f7211f9a 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -1411,6 +1411,19 @@ static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
> >       }
> >   }
> >
> > +static int spapr_reset_drcs(Object *child, void *opaque)
> > +{
> > +    sPAPRDRConnector *drc =
> > +        (sPAPRDRConnector *) object_dynamic_cast(child,
> > +                                                 TYPE_SPAPR_DR_CONNECTOR);
> > +
> > +    if (drc) {
> > +        spapr_drc_reset(drc);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >   static void ppc_spapr_reset(void)
> >   {
> >       MachineState *machine = MACHINE(qdev_get_machine());
> > @@ -1434,6 +1447,14 @@ static void ppc_spapr_reset(void)
> >       }
> >
> >       qemu_devices_reset();
> > +
> > +    /* DRC reset may cause a device to be unplugged. This will cause troubles
> > +     * if this device is used by another device (eg, a running vhost backend
> > +     * will crash QEMU if the DIMM holding the vring goes away). To avoid such
> > +     * situations, we reset DRCs after all devices have been reset.
> > +     */
> > +    object_child_foreach_recursive(object_get_root(), spapr_reset_drcs, NULL);
> > +
> >       spapr_clear_pending_events(spapr);
> >
> >       /*
> > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> > index 915e9b51c40c..e3b122968e89 100644
> > --- a/hw/ppc/spapr_drc.c
> > +++ b/hw/ppc/spapr_drc.c
> > @@ -455,11 +455,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
> >       }
> >   }
> >
> > -static void drc_reset(void *opaque)
> > -{
> > -    spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque));
> > -}
> > -
> >   bool spapr_drc_needed(void *opaque)
> >   {
> >       sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
> > @@ -518,7 +513,6 @@ static void realize(DeviceState *d, Error **errp)
> >       }
> >       vmstate_register(DEVICE(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
> >                        drc);
> > -    qemu_register_reset(drc_reset, drc);
> >       trace_spapr_drc_realize_complete(spapr_drc_index(drc));
> >   }
> >
> > @@ -529,7 +523,6 @@ static void unrealize(DeviceState *d, Error **errp)
> >       gchar *name;
> >
> >       trace_spapr_drc_unrealize(spapr_drc_index(drc));
> > -    qemu_unregister_reset(drc_reset, drc);
> >       vmstate_unregister(DEVICE(drc), &vmstate_spapr_drc, drc);
> >       root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
> >       name = g_strdup_printf("%x", spapr_drc_index(drc));
> >
> >
> 


Re: [Qemu-devel] [Qemu-ppc] [PATCH for-2.11] spapr: reset DRCs after devices
Posted by Daniel Henrique Barboza 6 years, 4 months ago

On 11/17/2017 04:19 PM, Michael Roth wrote:
> Quoting Daniel Henrique Barboza (2017-11-17 10:21:27)
>>
>> On 11/17/2017 10:56 AM, Greg Kurz wrote:
>>> A DRC with a pending unplug request releases its associated device at
>>> machine reset time.
>>>
>>> In the case of LMB, when all DRCs for a DIMM device have been reset,
>>> the DIMM gets unplugged, causing guest memory to disappear. This may
>>> be very confusing for anything still using this memory.
>>>
>>> This is exactly what happens with vhost backends, and QEMU aborts
>>> with:
>>>
>>> qemu-system-ppc64: used ring relocated for ring 2
>>> qemu-system-ppc64: qemu/hw/virtio/vhost.c:649: vhost_commit: Assertion
>>>    `r >= 0' failed.
>>>
>>> The issue is that each DRC registers a QEMU reset handler, and we
>>> don't control the order in which these handlers are called (ie,
>>> a LMB DRC will unplug a DIMM before the virtio device using the
>>> memory on this DIMM could stop its vhost backend).
>>>
>>> To avoid such situations, let's reset DRCs after all devices
>>> have been reset.
>>>
>>> Reported-by: Mallesh N. Koti <mallesh@linux.vnet.ibm.com>
>>> Signed-off-by: Greg Kurz <groug@kaod.org>
>>> ---
>> I believe this has to do with the problem discussed at the memory
>> unplug reboot with vhost=on, right?
>>
>> I don't see any problem with this patch and it's probably the best solution
>> short-term for the problem (and potentially other related LMB release
>> problems), but that said, how hard it is to forbid LMB unplug at all if the
>> memory is being used in vhost?
>>
>> The way I see it, the root problem is that a device unplug failed at the
>> guest
>> side and we don't know about it, leaving drc->unplug_requested flags set
>> around the code when it shouldn't. Reading that thread I see a comment from
> I'm not sure we want to go down the road of preemptively aborting
> specific cases where we think unplug will fail in the guest. We already
> need to deal with the cases where we don't know whether they'll fail
> ahead of time, so it seems like more of a headache to add special cases
> on top of that that management would need to deal with.

Just to be clear, I only advocate this kind of constraint if the LMB unplug,
at this specific scenario, has a 100% chance of fail. Otherwise we're
better of rolling the dice.


>
> And in this particular case we'd never be able to unplug the DIMM if
> the guest happens to use it for the vring each boot (unless maybe the
> unplug request occured during early boot), even though there's no
> technical reason we shouldn't be able to at least handle it on the
> next reset.

I am ok going this route if we have our bases covered. Does this
behavior of unplugging the LMBs pending unplug at reset documented
in the PAPR specs? If not, we would need to document it somewhere else
(or ask for a PAPR revision to add it).

Otherwise, I am not sure if management will be OK with LMBs being
"inadvertently" hot unplugged in a reset due to a failed LMB hot unplug that
might have occurred days ago and the user doesn't even remember it
anymore :)


Thanks,


Daniel
>
>> Michael S. Tsirkin saying that this bug is somewhat expected, that vhost
>> backend will
>> not behave well if memory is going away. Unlike other LMB unplug
>> failures from
>> the guest side that might happen for any other reason, this one sees
>> predicable
>> and we can avoid it.
>>
>> I think this is something worth considering. But for now,
>>
>>
>> Reviewed-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
>>
>>>    hw/ppc/spapr.c     |   21 +++++++++++++++++++++
>>>    hw/ppc/spapr_drc.c |    7 -------
>>>    2 files changed, 21 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index 6841bd294b3c..6285f7211f9a 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -1411,6 +1411,19 @@ static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
>>>        }
>>>    }
>>>
>>> +static int spapr_reset_drcs(Object *child, void *opaque)
>>> +{
>>> +    sPAPRDRConnector *drc =
>>> +        (sPAPRDRConnector *) object_dynamic_cast(child,
>>> +                                                 TYPE_SPAPR_DR_CONNECTOR);
>>> +
>>> +    if (drc) {
>>> +        spapr_drc_reset(drc);
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>    static void ppc_spapr_reset(void)
>>>    {
>>>        MachineState *machine = MACHINE(qdev_get_machine());
>>> @@ -1434,6 +1447,14 @@ static void ppc_spapr_reset(void)
>>>        }
>>>
>>>        qemu_devices_reset();
>>> +
>>> +    /* DRC reset may cause a device to be unplugged. This will cause troubles
>>> +     * if this device is used by another device (eg, a running vhost backend
>>> +     * will crash QEMU if the DIMM holding the vring goes away). To avoid such
>>> +     * situations, we reset DRCs after all devices have been reset.
>>> +     */
>>> +    object_child_foreach_recursive(object_get_root(), spapr_reset_drcs, NULL);
>>> +
>>>        spapr_clear_pending_events(spapr);
>>>
>>>        /*
>>> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
>>> index 915e9b51c40c..e3b122968e89 100644
>>> --- a/hw/ppc/spapr_drc.c
>>> +++ b/hw/ppc/spapr_drc.c
>>> @@ -455,11 +455,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
>>>        }
>>>    }
>>>
>>> -static void drc_reset(void *opaque)
>>> -{
>>> -    spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque));
>>> -}
>>> -
>>>    bool spapr_drc_needed(void *opaque)
>>>    {
>>>        sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
>>> @@ -518,7 +513,6 @@ static void realize(DeviceState *d, Error **errp)
>>>        }
>>>        vmstate_register(DEVICE(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
>>>                         drc);
>>> -    qemu_register_reset(drc_reset, drc);
>>>        trace_spapr_drc_realize_complete(spapr_drc_index(drc));
>>>    }
>>>
>>> @@ -529,7 +523,6 @@ static void unrealize(DeviceState *d, Error **errp)
>>>        gchar *name;
>>>
>>>        trace_spapr_drc_unrealize(spapr_drc_index(drc));
>>> -    qemu_unregister_reset(drc_reset, drc);
>>>        vmstate_unregister(DEVICE(drc), &vmstate_spapr_drc, drc);
>>>        root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
>>>        name = g_strdup_printf("%x", spapr_drc_index(drc));
>>>
>>>


Re: [Qemu-devel] [Qemu-ppc] [PATCH for-2.11] spapr: reset DRCs after devices
Posted by David Gibson 6 years, 4 months ago
On Fri, Nov 17, 2017 at 06:42:25PM -0200, Daniel Henrique Barboza wrote:
> 
> 
> On 11/17/2017 04:19 PM, Michael Roth wrote:
> > Quoting Daniel Henrique Barboza (2017-11-17 10:21:27)
> > > 
> > > On 11/17/2017 10:56 AM, Greg Kurz wrote:
> > > > A DRC with a pending unplug request releases its associated device at
> > > > machine reset time.
> > > > 
> > > > In the case of LMB, when all DRCs for a DIMM device have been reset,
> > > > the DIMM gets unplugged, causing guest memory to disappear. This may
> > > > be very confusing for anything still using this memory.
> > > > 
> > > > This is exactly what happens with vhost backends, and QEMU aborts
> > > > with:
> > > > 
> > > > qemu-system-ppc64: used ring relocated for ring 2
> > > > qemu-system-ppc64: qemu/hw/virtio/vhost.c:649: vhost_commit: Assertion
> > > >    `r >= 0' failed.
> > > > 
> > > > The issue is that each DRC registers a QEMU reset handler, and we
> > > > don't control the order in which these handlers are called (ie,
> > > > a LMB DRC will unplug a DIMM before the virtio device using the
> > > > memory on this DIMM could stop its vhost backend).
> > > > 
> > > > To avoid such situations, let's reset DRCs after all devices
> > > > have been reset.
> > > > 
> > > > Reported-by: Mallesh N. Koti <mallesh@linux.vnet.ibm.com>
> > > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > > > ---
> > > I believe this has to do with the problem discussed at the memory
> > > unplug reboot with vhost=on, right?
> > > 
> > > I don't see any problem with this patch and it's probably the best solution
> > > short-term for the problem (and potentially other related LMB release
> > > problems), but that said, how hard it is to forbid LMB unplug at all if the
> > > memory is being used in vhost?
> > > 
> > > The way I see it, the root problem is that a device unplug failed at the
> > > guest
> > > side and we don't know about it, leaving drc->unplug_requested flags set
> > > around the code when it shouldn't. Reading that thread I see a comment from
> > I'm not sure we want to go down the road of preemptively aborting
> > specific cases where we think unplug will fail in the guest. We already
> > need to deal with the cases where we don't know whether they'll fail
> > ahead of time, so it seems like more of a headache to add special cases
> > on top of that that management would need to deal with.
> 
> Just to be clear, I only advocate this kind of constraint if the LMB unplug,
> at this specific scenario, has a 100% chance of fail. Otherwise we're
> better of rolling the dice.
> 
> 
> > 
> > And in this particular case we'd never be able to unplug the DIMM if
> > the guest happens to use it for the vring each boot (unless maybe the
> > unplug request occured during early boot), even though there's no
> > technical reason we shouldn't be able to at least handle it on the
> > next reset.
> 
> I am ok going this route if we have our bases covered. Does this
> behavior of unplugging the LMBs pending unplug at reset documented
> in the PAPR specs? If not, we would need to document it somewhere else
> (or ask for a PAPR revision to add it).

Well, the LMB is supposed to be released once the guest has vacated
it.  At reset, the guest can't be using it any more, so..

> Otherwise, I am not sure if management will be OK with LMBs being
> "inadvertently" hot unplugged in a reset due to a failed LMB hot unplug that
> might have occurred days ago and the user doesn't even remember it
> anymore :)
> 
> 
> Thanks,
> 
> 
> Daniel
> > 
> > > Michael S. Tsirkin saying that this bug is somewhat expected, that vhost
> > > backend will
> > > not behave well if memory is going away. Unlike other LMB unplug
> > > failures from
> > > the guest side that might happen for any other reason, this one sees
> > > predicable
> > > and we can avoid it.
> > > 
> > > I think this is something worth considering. But for now,
> > > 
> > > 
> > > Reviewed-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
> > > 
> > > >    hw/ppc/spapr.c     |   21 +++++++++++++++++++++
> > > >    hw/ppc/spapr_drc.c |    7 -------
> > > >    2 files changed, 21 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > index 6841bd294b3c..6285f7211f9a 100644
> > > > --- a/hw/ppc/spapr.c
> > > > +++ b/hw/ppc/spapr.c
> > > > @@ -1411,6 +1411,19 @@ static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
> > > >        }
> > > >    }
> > > > 
> > > > +static int spapr_reset_drcs(Object *child, void *opaque)
> > > > +{
> > > > +    sPAPRDRConnector *drc =
> > > > +        (sPAPRDRConnector *) object_dynamic_cast(child,
> > > > +                                                 TYPE_SPAPR_DR_CONNECTOR);
> > > > +
> > > > +    if (drc) {
> > > > +        spapr_drc_reset(drc);
> > > > +    }
> > > > +
> > > > +    return 0;
> > > > +}
> > > > +
> > > >    static void ppc_spapr_reset(void)
> > > >    {
> > > >        MachineState *machine = MACHINE(qdev_get_machine());
> > > > @@ -1434,6 +1447,14 @@ static void ppc_spapr_reset(void)
> > > >        }
> > > > 
> > > >        qemu_devices_reset();
> > > > +
> > > > +    /* DRC reset may cause a device to be unplugged. This will cause troubles
> > > > +     * if this device is used by another device (eg, a running vhost backend
> > > > +     * will crash QEMU if the DIMM holding the vring goes away). To avoid such
> > > > +     * situations, we reset DRCs after all devices have been reset.
> > > > +     */
> > > > +    object_child_foreach_recursive(object_get_root(), spapr_reset_drcs, NULL);
> > > > +
> > > >        spapr_clear_pending_events(spapr);
> > > > 
> > > >        /*
> > > > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> > > > index 915e9b51c40c..e3b122968e89 100644
> > > > --- a/hw/ppc/spapr_drc.c
> > > > +++ b/hw/ppc/spapr_drc.c
> > > > @@ -455,11 +455,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
> > > >        }
> > > >    }
> > > > 
> > > > -static void drc_reset(void *opaque)
> > > > -{
> > > > -    spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque));
> > > > -}
> > > > -
> > > >    bool spapr_drc_needed(void *opaque)
> > > >    {
> > > >        sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
> > > > @@ -518,7 +513,6 @@ static void realize(DeviceState *d, Error **errp)
> > > >        }
> > > >        vmstate_register(DEVICE(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
> > > >                         drc);
> > > > -    qemu_register_reset(drc_reset, drc);
> > > >        trace_spapr_drc_realize_complete(spapr_drc_index(drc));
> > > >    }
> > > > 
> > > > @@ -529,7 +523,6 @@ static void unrealize(DeviceState *d, Error **errp)
> > > >        gchar *name;
> > > > 
> > > >        trace_spapr_drc_unrealize(spapr_drc_index(drc));
> > > > -    qemu_unregister_reset(drc_reset, drc);
> > > >        vmstate_unregister(DEVICE(drc), &vmstate_spapr_drc, drc);
> > > >        root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
> > > >        name = g_strdup_printf("%x", spapr_drc_index(drc));
> > > > 
> > > > 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
Re: [Qemu-devel] [PATCH for-2.11] spapr: reset DRCs after devices
Posted by Michael Roth 6 years, 4 months ago
Quoting Greg Kurz (2017-11-17 06:56:48)
> A DRC with a pending unplug request releases its associated device at
> machine reset time.
> 
> In the case of LMB, when all DRCs for a DIMM device have been reset,
> the DIMM gets unplugged, causing guest memory to disappear. This may
> be very confusing for anything still using this memory.
> 
> This is exactly what happens with vhost backends, and QEMU aborts
> with:
> 
> qemu-system-ppc64: used ring relocated for ring 2
> qemu-system-ppc64: qemu/hw/virtio/vhost.c:649: vhost_commit: Assertion
>  `r >= 0' failed.
> 
> The issue is that each DRC registers a QEMU reset handler, and we
> don't control the order in which these handlers are called (ie,
> a LMB DRC will unplug a DIMM before the virtio device using the
> memory on this DIMM could stop its vhost backend).
> 
> To avoid such situations, let's reset DRCs after all devices
> have been reset.
> 
> Reported-by: Mallesh N. Koti <mallesh@linux.vnet.ibm.com>
> Signed-off-by: Greg Kurz <groug@kaod.org>

Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>

> ---
>  hw/ppc/spapr.c     |   21 +++++++++++++++++++++
>  hw/ppc/spapr_drc.c |    7 -------
>  2 files changed, 21 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 6841bd294b3c..6285f7211f9a 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1411,6 +1411,19 @@ static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
>      }
>  }
> 
> +static int spapr_reset_drcs(Object *child, void *opaque)
> +{
> +    sPAPRDRConnector *drc =
> +        (sPAPRDRConnector *) object_dynamic_cast(child,
> +                                                 TYPE_SPAPR_DR_CONNECTOR);
> +
> +    if (drc) {
> +        spapr_drc_reset(drc);
> +    }
> +
> +    return 0;
> +}
> +
>  static void ppc_spapr_reset(void)
>  {
>      MachineState *machine = MACHINE(qdev_get_machine());
> @@ -1434,6 +1447,14 @@ static void ppc_spapr_reset(void)
>      }
> 
>      qemu_devices_reset();
> +
> +    /* DRC reset may cause a device to be unplugged. This will cause troubles
> +     * if this device is used by another device (eg, a running vhost backend
> +     * will crash QEMU if the DIMM holding the vring goes away). To avoid such
> +     * situations, we reset DRCs after all devices have been reset.
> +     */
> +    object_child_foreach_recursive(object_get_root(), spapr_reset_drcs, NULL);
> +
>      spapr_clear_pending_events(spapr);
> 
>      /*
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index 915e9b51c40c..e3b122968e89 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -455,11 +455,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
>      }
>  }
> 
> -static void drc_reset(void *opaque)
> -{
> -    spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque));
> -}
> -
>  bool spapr_drc_needed(void *opaque)
>  {
>      sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
> @@ -518,7 +513,6 @@ static void realize(DeviceState *d, Error **errp)
>      }
>      vmstate_register(DEVICE(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
>                       drc);
> -    qemu_register_reset(drc_reset, drc);
>      trace_spapr_drc_realize_complete(spapr_drc_index(drc));
>  }
> 
> @@ -529,7 +523,6 @@ static void unrealize(DeviceState *d, Error **errp)
>      gchar *name;
> 
>      trace_spapr_drc_unrealize(spapr_drc_index(drc));
> -    qemu_unregister_reset(drc_reset, drc);
>      vmstate_unregister(DEVICE(drc), &vmstate_spapr_drc, drc);
>      root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
>      name = g_strdup_printf("%x", spapr_drc_index(drc));
> 


Re: [Qemu-devel] [PATCH for-2.11] spapr: reset DRCs after devices
Posted by David Gibson 6 years, 4 months ago
On Fri, Nov 17, 2017 at 01:56:48PM +0100, Greg Kurz wrote:
> A DRC with a pending unplug request releases its associated device at
> machine reset time.
> 
> In the case of LMB, when all DRCs for a DIMM device have been reset,
> the DIMM gets unplugged, causing guest memory to disappear. This may
> be very confusing for anything still using this memory.
> 
> This is exactly what happens with vhost backends, and QEMU aborts
> with:
> 
> qemu-system-ppc64: used ring relocated for ring 2
> qemu-system-ppc64: qemu/hw/virtio/vhost.c:649: vhost_commit: Assertion
>  `r >= 0' failed.
> 
> The issue is that each DRC registers a QEMU reset handler, and we
> don't control the order in which these handlers are called (ie,
> a LMB DRC will unplug a DIMM before the virtio device using the
> memory on this DIMM could stop its vhost backend).
> 
> To avoid such situations, let's reset DRCs after all devices
> have been reset.
> 
> Reported-by: Mallesh N. Koti <mallesh@linux.vnet.ibm.com>
> Signed-off-by: Greg Kurz <groug@kaod.org>

Applied to ppc-for-2.11.  I'll be looking at preparing a pull request
today.

> ---
>  hw/ppc/spapr.c     |   21 +++++++++++++++++++++
>  hw/ppc/spapr_drc.c |    7 -------
>  2 files changed, 21 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 6841bd294b3c..6285f7211f9a 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1411,6 +1411,19 @@ static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
>      }
>  }
>  
> +static int spapr_reset_drcs(Object *child, void *opaque)
> +{
> +    sPAPRDRConnector *drc =
> +        (sPAPRDRConnector *) object_dynamic_cast(child,
> +                                                 TYPE_SPAPR_DR_CONNECTOR);
> +
> +    if (drc) {
> +        spapr_drc_reset(drc);
> +    }
> +
> +    return 0;
> +}
> +
>  static void ppc_spapr_reset(void)
>  {
>      MachineState *machine = MACHINE(qdev_get_machine());
> @@ -1434,6 +1447,14 @@ static void ppc_spapr_reset(void)
>      }
>  
>      qemu_devices_reset();
> +
> +    /* DRC reset may cause a device to be unplugged. This will cause troubles
> +     * if this device is used by another device (eg, a running vhost backend
> +     * will crash QEMU if the DIMM holding the vring goes away). To avoid such
> +     * situations, we reset DRCs after all devices have been reset.
> +     */
> +    object_child_foreach_recursive(object_get_root(), spapr_reset_drcs, NULL);
> +
>      spapr_clear_pending_events(spapr);
>  
>      /*
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index 915e9b51c40c..e3b122968e89 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -455,11 +455,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
>      }
>  }
>  
> -static void drc_reset(void *opaque)
> -{
> -    spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque));
> -}
> -
>  bool spapr_drc_needed(void *opaque)
>  {
>      sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
> @@ -518,7 +513,6 @@ static void realize(DeviceState *d, Error **errp)
>      }
>      vmstate_register(DEVICE(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
>                       drc);
> -    qemu_register_reset(drc_reset, drc);
>      trace_spapr_drc_realize_complete(spapr_drc_index(drc));
>  }
>  
> @@ -529,7 +523,6 @@ static void unrealize(DeviceState *d, Error **errp)
>      gchar *name;
>  
>      trace_spapr_drc_unrealize(spapr_drc_index(drc));
> -    qemu_unregister_reset(drc_reset, drc);
>      vmstate_unregister(DEVICE(drc), &vmstate_spapr_drc, drc);
>      root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
>      name = g_strdup_printf("%x", spapr_drc_index(drc));
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson