[Qemu-devel] [PATCH] spapr: fix memory hotplug error path

Greg Kurz posted 1 patch 6 years, 9 months ago
Failed in applying to current master (apply log)
There is a newer version of this series
hw/ppc/spapr.c |   10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
[Qemu-devel] [PATCH] spapr: fix memory hotplug error path
Posted by Greg Kurz 6 years, 9 months ago
QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails.
Let's propagate the error instead, like it is done everywhere else
where spapr_drc_attach() is called.

Signed-off-by: Greg Kurz <groug@kaod.org>
---
 hw/ppc/spapr.c |   10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 70b3fd374e2b..e103be500189 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
     int i, fdt_offset, fdt_size;
     void *fdt;
     uint64_t addr = addr_start;
+    Error *local_err = NULL;
 
     for (i = 0; i < nr_lmbs; i++) {
         drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
@@ -2611,7 +2612,12 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
         fdt_offset = spapr_populate_memory_node(fdt, node, addr,
                                                 SPAPR_MEMORY_BLOCK_SIZE);
 
-        spapr_drc_attach(drc, dev, fdt, fdt_offset, errp);
+        spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
+        if (local_err) {
+            g_free(fdt);
+            error_propagate(errp, local_err);
+            return;
+        }
         addr += SPAPR_MEMORY_BLOCK_SIZE;
     }
     /* send hotplug notification to the
@@ -2657,7 +2663,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
 
     spapr_add_lmbs(dev, addr, size, node,
                    spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT),
-                   &error_abort);
+                   &local_err);
 
 out:
     error_propagate(errp, local_err);


Re: [Qemu-devel] [Qemu-ppc] [PATCH] spapr: fix memory hotplug error path
Posted by Daniel Henrique Barboza 6 years, 9 months ago

On 07/03/2017 09:21 AM, Greg Kurz wrote:
> QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails.
> Let's propagate the error instead, like it is done everywhere else
> where spapr_drc_attach() is called.
>
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>   hw/ppc/spapr.c |   10 ++++++++--
>   1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 70b3fd374e2b..e103be500189 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
>       int i, fdt_offset, fdt_size;
>       void *fdt;
>       uint64_t addr = addr_start;
> +    Error *local_err = NULL;
>
>       for (i = 0; i < nr_lmbs; i++) {
>           drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
> @@ -2611,7 +2612,12 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
>           fdt_offset = spapr_populate_memory_node(fdt, node, addr,
>                                                   SPAPR_MEMORY_BLOCK_SIZE);
>
> -        spapr_drc_attach(drc, dev, fdt, fdt_offset, errp);
> +        spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
> +        if (local_err) {
> +            g_free(fdt);
> +            error_propagate(errp, local_err);
> +            return;
> +        }
>           addr += SPAPR_MEMORY_BLOCK_SIZE;
>       }
>       /* send hotplug notification to the
> @@ -2657,7 +2663,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
>
>       spapr_add_lmbs(dev, addr, size, node,
>                      spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT),
> -                   &error_abort);
> +                   &local_err);
>
>   out:
>       error_propagate(errp, local_err);
>
>
Reviewed-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>


Re: [Qemu-devel] [PATCH] spapr: fix memory hotplug error path
Posted by Igor Mammedov 6 years, 9 months ago
On Mon, 03 Jul 2017 14:21:31 +0200
Greg Kurz <groug@kaod.org> wrote:

> QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails.
> Let's propagate the error instead, like it is done everywhere else
> where spapr_drc_attach() is called.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>  hw/ppc/spapr.c |   10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 70b3fd374e2b..e103be500189 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
>      int i, fdt_offset, fdt_size;
>      void *fdt;
>      uint64_t addr = addr_start;
> +    Error *local_err = NULL;
>  
>      for (i = 0; i < nr_lmbs; i++) {
>          drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
> @@ -2611,7 +2612,12 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
>          fdt_offset = spapr_populate_memory_node(fdt, node, addr,
>                                                  SPAPR_MEMORY_BLOCK_SIZE);
>  
> -        spapr_drc_attach(drc, dev, fdt, fdt_offset, errp);
> +        spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
> +        if (local_err) {
> +            g_free(fdt);
> +            error_propagate(errp, local_err);
> +            return;
> +        }
>          addr += SPAPR_MEMORY_BLOCK_SIZE;
>      }
>      /* send hotplug notification to the
> @@ -2657,7 +2663,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
>  
>      spapr_add_lmbs(dev, addr, size, node,
>                     spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT),
> -                   &error_abort);
> +                   &local_err);
where is code that handles error condition?
 i.e. undo what pc_dimm_memory_plug() has done

>  
>  out:
>      error_propagate(errp, local_err);
> 
> 


Re: [Qemu-devel] [PATCH] spapr: fix memory hotplug error path
Posted by Greg Kurz 6 years, 9 months ago
On Mon, 3 Jul 2017 15:43:26 +0200
Igor Mammedov <imammedo@redhat.com> wrote:

> On Mon, 03 Jul 2017 14:21:31 +0200
> Greg Kurz <groug@kaod.org> wrote:
> 
> > QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails.
> > Let's propagate the error instead, like it is done everywhere else
> > where spapr_drc_attach() is called.
> > 
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---
> >  hw/ppc/spapr.c |   10 ++++++++--
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 70b3fd374e2b..e103be500189 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
> >      int i, fdt_offset, fdt_size;
> >      void *fdt;
> >      uint64_t addr = addr_start;
> > +    Error *local_err = NULL;
> >  
> >      for (i = 0; i < nr_lmbs; i++) {
> >          drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
> > @@ -2611,7 +2612,12 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
> >          fdt_offset = spapr_populate_memory_node(fdt, node, addr,
> >                                                  SPAPR_MEMORY_BLOCK_SIZE);
> >  
> > -        spapr_drc_attach(drc, dev, fdt, fdt_offset, errp);
> > +        spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
> > +        if (local_err) {
> > +            g_free(fdt);
> > +            error_propagate(errp, local_err);
> > +            return;
> > +        }
> >          addr += SPAPR_MEMORY_BLOCK_SIZE;
> >      }
> >      /* send hotplug notification to the
> > @@ -2657,7 +2663,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> >  
> >      spapr_add_lmbs(dev, addr, size, node,
> >                     spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT),
> > -                   &error_abort);
> > +                   &local_err);  
> where is code that handles error condition?
>  i.e. undo what pc_dimm_memory_plug() has done
> 

Oops, you're right and I also need to add some rollback in
spapr_add_lmbs() as well... :-\

> >  
> >  out:
> >      error_propagate(errp, local_err);
> > 
> >   
> 

Re: [Qemu-devel] [PATCH] spapr: fix memory hotplug error path
Posted by Bharata B Rao 6 years, 9 months ago
On Mon, Jul 03, 2017 at 02:21:31PM +0200, Greg Kurz wrote:
> QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails.
> Let's propagate the error instead, like it is done everywhere else
> where spapr_drc_attach() is called.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>  hw/ppc/spapr.c |   10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 70b3fd374e2b..e103be500189 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
>      int i, fdt_offset, fdt_size;
>      void *fdt;
>      uint64_t addr = addr_start;
> +    Error *local_err = NULL;
> 
>      for (i = 0; i < nr_lmbs; i++) {
>          drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
> @@ -2611,7 +2612,12 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
>          fdt_offset = spapr_populate_memory_node(fdt, node, addr,
>                                                  SPAPR_MEMORY_BLOCK_SIZE);
> 
> -        spapr_drc_attach(drc, dev, fdt, fdt_offset, errp);
> +        spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
> +        if (local_err) {
> +            g_free(fdt);
> +            error_propagate(errp, local_err);
> +            return;
> +        }

There is some history to this. I was doing error recovery and propagation
here similarly during memory hotplug development phase until Igor
suggested that we shoudn't try to recover after we have done guest
visible changes.

Refer to "changes in v6" section in this post:
https://lists.gnu.org/archive/html/qemu-ppc/2015-06/msg00296.html

However at that time we were doing memory add by DRC index method
and hence would attach and online one LMB at a time.
In that method, if an intermediate attach fails we would end up with a few
LMBs being onlined by the guest already. However subsequently
we have switched (optionally, based on dedicated_hp_event_source) to
count-indexed method of hotplug where we do attach of all LMBs one by one
and then request the guest to hotplug all of them at once using count-indexed
method.

So it will be a bit tricky to abort for index based case and recover
correctly for count-indexed case.

Regards,
Bharata.


Re: [Qemu-devel] [PATCH] spapr: fix memory hotplug error path
Posted by Bharata B Rao 6 years, 9 months ago
On Tue, Jul 04, 2017 at 09:01:43AM +0530, Bharata B Rao wrote:
> On Mon, Jul 03, 2017 at 02:21:31PM +0200, Greg Kurz wrote:
> > QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails.
> > Let's propagate the error instead, like it is done everywhere else
> > where spapr_drc_attach() is called.
> > 
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---
> >  hw/ppc/spapr.c |   10 ++++++++--
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 70b3fd374e2b..e103be500189 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
> >      int i, fdt_offset, fdt_size;
> >      void *fdt;
> >      uint64_t addr = addr_start;
> > +    Error *local_err = NULL;
> > 
> >      for (i = 0; i < nr_lmbs; i++) {
> >          drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
> > @@ -2611,7 +2612,12 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
> >          fdt_offset = spapr_populate_memory_node(fdt, node, addr,
> >                                                  SPAPR_MEMORY_BLOCK_SIZE);
> > 
> > -        spapr_drc_attach(drc, dev, fdt, fdt_offset, errp);
> > +        spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
> > +        if (local_err) {
> > +            g_free(fdt);
> > +            error_propagate(errp, local_err);
> > +            return;
> > +        }
> 
> There is some history to this. I was doing error recovery and propagation
> here similarly during memory hotplug development phase until Igor
> suggested that we shoudn't try to recover after we have done guest
> visible changes.
> 
> Refer to "changes in v6" section in this post:
> https://lists.gnu.org/archive/html/qemu-ppc/2015-06/msg00296.html
> 
> However at that time we were doing memory add by DRC index method
> and hence would attach and online one LMB at a time.
> In that method, if an intermediate attach fails we would end up with a few
> LMBs being onlined by the guest already. However subsequently
> we have switched (optionally, based on dedicated_hp_event_source) to
> count-indexed method of hotplug where we do attach of all LMBs one by one
> and then request the guest to hotplug all of them at once using count-indexed
> method.
> 
> So it will be a bit tricky to abort for index based case and recover
> correctly for count-indexed case.

Looked at the code again and realized that though we started with
index based LMB addition, we later switched to count based addition. Then
we added support for count-indexed type subject to the presence
of dedidated hotplug event source while still retaining the support
for count based addition.

So presently we do attach of all LMBs one by one and then do onlining
(count based or count-indexed based) once. Hence error recovery
for both cases would be similar now. So I guess you should take care of
undoing pc_dimm_memory_plug() like Igor mentioned and also undo the
effects of partial successful attaches.

> 
> Regards,
> Bharata.


Re: [Qemu-devel] [PATCH] spapr: fix memory hotplug error path
Posted by Greg Kurz 6 years, 9 months ago
On Tue, 4 Jul 2017 09:20:50 +0530
Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:

> On Tue, Jul 04, 2017 at 09:01:43AM +0530, Bharata B Rao wrote:
> > On Mon, Jul 03, 2017 at 02:21:31PM +0200, Greg Kurz wrote:  
> > > QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails.
> > > Let's propagate the error instead, like it is done everywhere else
> > > where spapr_drc_attach() is called.
> > > 
> > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > > ---
> > >  hw/ppc/spapr.c |   10 ++++++++--
> > >  1 file changed, 8 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index 70b3fd374e2b..e103be500189 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
> > >      int i, fdt_offset, fdt_size;
> > >      void *fdt;
> > >      uint64_t addr = addr_start;
> > > +    Error *local_err = NULL;
> > > 
> > >      for (i = 0; i < nr_lmbs; i++) {
> > >          drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
> > > @@ -2611,7 +2612,12 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
> > >          fdt_offset = spapr_populate_memory_node(fdt, node, addr,
> > >                                                  SPAPR_MEMORY_BLOCK_SIZE);
> > > 
> > > -        spapr_drc_attach(drc, dev, fdt, fdt_offset, errp);
> > > +        spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
> > > +        if (local_err) {
> > > +            g_free(fdt);
> > > +            error_propagate(errp, local_err);
> > > +            return;
> > > +        }  
> > 
> > There is some history to this. I was doing error recovery and propagation
> > here similarly during memory hotplug development phase until Igor
> > suggested that we shoudn't try to recover after we have done guest
> > visible changes.
> > 
> > Refer to "changes in v6" section in this post:
> > https://lists.gnu.org/archive/html/qemu-ppc/2015-06/msg00296.html
> > 
> > However at that time we were doing memory add by DRC index method
> > and hence would attach and online one LMB at a time.
> > In that method, if an intermediate attach fails we would end up with a few
> > LMBs being onlined by the guest already. However subsequently
> > we have switched (optionally, based on dedicated_hp_event_source) to
> > count-indexed method of hotplug where we do attach of all LMBs one by one
> > and then request the guest to hotplug all of them at once using count-indexed
> > method.
> > 
> > So it will be a bit tricky to abort for index based case and recover
> > correctly for count-indexed case.  
> 
> Looked at the code again and realized that though we started with
> index based LMB addition, we later switched to count based addition. Then
> we added support for count-indexed type subject to the presence
> of dedidated hotplug event source while still retaining the support
> for count based addition.
> 
> So presently we do attach of all LMBs one by one and then do onlining
> (count based or count-indexed based) once. Hence error recovery
> for both cases would be similar now. So I guess you should take care of
> undoing pc_dimm_memory_plug() like Igor mentioned and also undo the
> effects of partial successful attaches.
> 

I've sent a v2 that adds rollback.

Cheers,

--
Greg


> > 
> > Regards,
> > Bharata.  
> 

Re: [Qemu-devel] [PATCH] spapr: fix memory hotplug error path
Posted by Bharata B Rao 6 years, 9 months ago
On Tue, Jul 04, 2017 at 10:02:46AM +0200, Greg Kurz wrote:
> > > There is some history to this. I was doing error recovery and propagation
> > > here similarly during memory hotplug development phase until Igor
> > > suggested that we shoudn't try to recover after we have done guest
> > > visible changes.
> > > 
> > > Refer to "changes in v6" section in this post:
> > > https://lists.gnu.org/archive/html/qemu-ppc/2015-06/msg00296.html
> > > 
> > > However at that time we were doing memory add by DRC index method
> > > and hence would attach and online one LMB at a time.
> > > In that method, if an intermediate attach fails we would end up with a few
> > > LMBs being onlined by the guest already. However subsequently
> > > we have switched (optionally, based on dedicated_hp_event_source) to
> > > count-indexed method of hotplug where we do attach of all LMBs one by one
> > > and then request the guest to hotplug all of them at once using count-indexed
> > > method.
> > > 
> > > So it will be a bit tricky to abort for index based case and recover
> > > correctly for count-indexed case.  
> > 
> > Looked at the code again and realized that though we started with
> > index based LMB addition, we later switched to count based addition. Then
> > we added support for count-indexed type subject to the presence
> > of dedidated hotplug event source while still retaining the support
> > for count based addition.
> > 
> > So presently we do attach of all LMBs one by one and then do onlining
> > (count based or count-indexed based) once. Hence error recovery
> > for both cases would be similar now. So I guess you should take care of
> > undoing pc_dimm_memory_plug() like Igor mentioned and also undo the
> > effects of partial successful attaches.
> > 
> 
> I've sent a v2 that adds rollback.

oh ok, somehow v2 didn't reach me at all and I saw the v2 in archives only
now. So just noting that my above replies were sent w/o being aware of v2 :)

> > > 
> > > Regards,
> > > Bharata.  


Re: [Qemu-devel] [PATCH] spapr: fix memory hotplug error path
Posted by Greg Kurz 6 years, 9 months ago
On Tue, 4 Jul 2017 14:41:33 +0530
Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:

> On Tue, Jul 04, 2017 at 10:02:46AM +0200, Greg Kurz wrote:
> > > > There is some history to this. I was doing error recovery and propagation
> > > > here similarly during memory hotplug development phase until Igor
> > > > suggested that we shoudn't try to recover after we have done guest
> > > > visible changes.
> > > > 
> > > > Refer to "changes in v6" section in this post:
> > > > https://lists.gnu.org/archive/html/qemu-ppc/2015-06/msg00296.html
> > > > 
> > > > However at that time we were doing memory add by DRC index method
> > > > and hence would attach and online one LMB at a time.
> > > > In that method, if an intermediate attach fails we would end up with a few
> > > > LMBs being onlined by the guest already. However subsequently
> > > > we have switched (optionally, based on dedicated_hp_event_source) to
> > > > count-indexed method of hotplug where we do attach of all LMBs one by one
> > > > and then request the guest to hotplug all of them at once using count-indexed
> > > > method.
> > > > 
> > > > So it will be a bit tricky to abort for index based case and recover
> > > > correctly for count-indexed case.    
> > > 
> > > Looked at the code again and realized that though we started with
> > > index based LMB addition, we later switched to count based addition. Then
> > > we added support for count-indexed type subject to the presence
> > > of dedidated hotplug event source while still retaining the support
> > > for count based addition.
> > > 
> > > So presently we do attach of all LMBs one by one and then do onlining
> > > (count based or count-indexed based) once. Hence error recovery
> > > for both cases would be similar now. So I guess you should take care of
> > > undoing pc_dimm_memory_plug() like Igor mentioned and also undo the
> > > effects of partial successful attaches.
> > >   
> > 
> > I've sent a v2 that adds rollback.  
> 
> oh ok, somehow v2 didn't reach me at all and I saw the v2 in archives only
> now. So just noting that my above replies were sent w/o being aware of v2 :)
> 

No problem at all. It confirms that v2 was indeed needed. Also, it exposes
some details I wasn't aware of. Thanks for the explanation! :)

Cheers,

--
Greg

> > > > 
> > > > Regards,
> > > > Bharata.    
>