hw/ppc/spapr.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-)
QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails.
Let's propagate the error instead, like it is done everywhere else
where spapr_drc_attach() is called.
Signed-off-by: Greg Kurz <groug@kaod.org>
---
Changes in v2: - added rollback code
---
hw/ppc/spapr.c | 26 ++++++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 70b3fd374e2b..ba8f57a5a054 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
int i, fdt_offset, fdt_size;
void *fdt;
uint64_t addr = addr_start;
+ Error *local_err = NULL;
for (i = 0; i < nr_lmbs; i++) {
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
@@ -2611,7 +2612,18 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
fdt_offset = spapr_populate_memory_node(fdt, node, addr,
SPAPR_MEMORY_BLOCK_SIZE);
- spapr_drc_attach(drc, dev, fdt, fdt_offset, errp);
+ spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
+ if (local_err) {
+ while (addr > addr_start) {
+ addr -= SPAPR_MEMORY_BLOCK_SIZE;
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+ addr / SPAPR_MEMORY_BLOCK_SIZE);
+ spapr_drc_detach(drc, dev, NULL);
+ }
+ g_free(fdt);
+ error_propagate(errp, local_err);
+ return;
+ }
addr += SPAPR_MEMORY_BLOCK_SIZE;
}
/* send hotplug notification to the
@@ -2651,14 +2663,20 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
addr = object_property_get_uint(OBJECT(dimm),
PC_DIMM_ADDR_PROP, &local_err);
if (local_err) {
- pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr);
- goto out;
+ goto out_unplug;
}
spapr_add_lmbs(dev, addr, size, node,
spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT),
- &error_abort);
+ &local_err);
+ if (local_err) {
+ goto out_unplug;
+ }
+
+ return;
+out_unplug:
+ pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr);
out:
error_propagate(errp, local_err);
}
On 07/03/2017 11:54 AM, Greg Kurz wrote: > QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails. > Let's propagate the error instead, like it is done everywhere else > where spapr_drc_attach() is called. > > Signed-off-by: Greg Kurz <groug@kaod.org> > --- > Changes in v2: - added rollback code > --- > hw/ppc/spapr.c | 26 ++++++++++++++++++++++---- > 1 file changed, 22 insertions(+), 4 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 70b3fd374e2b..ba8f57a5a054 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, > int i, fdt_offset, fdt_size; > void *fdt; > uint64_t addr = addr_start; > + Error *local_err = NULL; > > for (i = 0; i < nr_lmbs; i++) { > drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, > @@ -2611,7 +2612,18 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, > fdt_offset = spapr_populate_memory_node(fdt, node, addr, > SPAPR_MEMORY_BLOCK_SIZE); > > - spapr_drc_attach(drc, dev, fdt, fdt_offset, errp); > + spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err); > + if (local_err) { > + while (addr > addr_start) { > + addr -= SPAPR_MEMORY_BLOCK_SIZE; > + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, > + addr / SPAPR_MEMORY_BLOCK_SIZE); > + spapr_drc_detach(drc, dev, NULL); Question: why pass NULL instead of '&local_err' in detach() ? Can we ignore any any error that happens in detach() because you're propagating the local_err set in spapr_drc_attach() already? ps: I am aware that ATM detach() does not propagate anything to the errp being passed. I don't think it's wise to write new code based on this assumption though. Thanks, Daniel > + } > + g_free(fdt); > + error_propagate(errp, local_err); > + return; > + } > addr += SPAPR_MEMORY_BLOCK_SIZE; > } > /* send hotplug notification to the > @@ -2651,14 +2663,20 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > addr = object_property_get_uint(OBJECT(dimm), > PC_DIMM_ADDR_PROP, &local_err); > if (local_err) { > - pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); > - goto out; > + goto out_unplug; > } > > spapr_add_lmbs(dev, addr, size, node, > spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT), > - &error_abort); > + &local_err); > + if (local_err) { > + goto out_unplug; > + } > + > + return; > > +out_unplug: > + pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); > out: > error_propagate(errp, local_err); > } > >
On Mon, 3 Jul 2017 14:03:07 -0300 Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> wrote: > On 07/03/2017 11:54 AM, Greg Kurz wrote: > > QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails. > > Let's propagate the error instead, like it is done everywhere else > > where spapr_drc_attach() is called. > > > > Signed-off-by: Greg Kurz <groug@kaod.org> > > --- > > Changes in v2: - added rollback code > > --- > > hw/ppc/spapr.c | 26 ++++++++++++++++++++++---- > > 1 file changed, 22 insertions(+), 4 deletions(-) > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > index 70b3fd374e2b..ba8f57a5a054 100644 > > --- a/hw/ppc/spapr.c > > +++ b/hw/ppc/spapr.c > > @@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, > > int i, fdt_offset, fdt_size; > > void *fdt; > > uint64_t addr = addr_start; > > + Error *local_err = NULL; > > > > for (i = 0; i < nr_lmbs; i++) { > > drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, > > @@ -2611,7 +2612,18 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, > > fdt_offset = spapr_populate_memory_node(fdt, node, addr, > > SPAPR_MEMORY_BLOCK_SIZE); > > > > - spapr_drc_attach(drc, dev, fdt, fdt_offset, errp); > > + spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err); > > + if (local_err) { > > + while (addr > addr_start) { > > + addr -= SPAPR_MEMORY_BLOCK_SIZE; > > + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, > > + addr / SPAPR_MEMORY_BLOCK_SIZE); > > + spapr_drc_detach(drc, dev, NULL); > > Question: why pass NULL instead of '&local_err' in detach() ? local_err already contains the error returned by spapr_drc_attach(), it cannot be used anymore. > Can we ignore any > any error that happens in detach() because you're propagating the > local_err set > in spapr_drc_attach() already? > This is a rollback path: the best we can do is to try to undo the already attached DRCs... > ps: I am aware that ATM detach() does not propagate anything to the errp > being > passed. I don't think it's wise to write new code based on this > assumption though. > ... and indeed detach() cannot fail with the current code base. :) David was even planning to drop the errp argument in the "spapr: Refactor spapr_drc_detach()" patch. Otherwise, maybe pass &error_abort since failing to rollback is likely to be a bug ? Cheers, -- Greg > > Thanks, > > > Daniel > > > + } > > + g_free(fdt); > > + error_propagate(errp, local_err); > > + return; > > + } > > addr += SPAPR_MEMORY_BLOCK_SIZE; > > } > > /* send hotplug notification to the > > @@ -2651,14 +2663,20 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > > addr = object_property_get_uint(OBJECT(dimm), > > PC_DIMM_ADDR_PROP, &local_err); > > if (local_err) { > > - pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); > > - goto out; > > + goto out_unplug; > > } > > > > spapr_add_lmbs(dev, addr, size, node, > > spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT), > > - &error_abort); > > + &local_err); > > + if (local_err) { > > + goto out_unplug; > > + } > > + > > + return; > > > > +out_unplug: > > + pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); > > out: > > error_propagate(errp, local_err); > > } > > > > >
On 07/04/2017 04:42 AM, Greg Kurz wrote: > On Mon, 3 Jul 2017 14:03:07 -0300 > Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> wrote: > >> On 07/03/2017 11:54 AM, Greg Kurz wrote: >>> QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails. >>> Let's propagate the error instead, like it is done everywhere else >>> where spapr_drc_attach() is called. >>> >>> Signed-off-by: Greg Kurz <groug@kaod.org> >>> --- >>> Changes in v2: - added rollback code >>> --- >>> hw/ppc/spapr.c | 26 ++++++++++++++++++++++---- >>> 1 file changed, 22 insertions(+), 4 deletions(-) >>> >>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c >>> index 70b3fd374e2b..ba8f57a5a054 100644 >>> --- a/hw/ppc/spapr.c >>> +++ b/hw/ppc/spapr.c >>> @@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, >>> int i, fdt_offset, fdt_size; >>> void *fdt; >>> uint64_t addr = addr_start; >>> + Error *local_err = NULL; >>> >>> for (i = 0; i < nr_lmbs; i++) { >>> drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, >>> @@ -2611,7 +2612,18 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, >>> fdt_offset = spapr_populate_memory_node(fdt, node, addr, >>> SPAPR_MEMORY_BLOCK_SIZE); >>> >>> - spapr_drc_attach(drc, dev, fdt, fdt_offset, errp); >>> + spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err); >>> + if (local_err) { >>> + while (addr > addr_start) { >>> + addr -= SPAPR_MEMORY_BLOCK_SIZE; >>> + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, >>> + addr / SPAPR_MEMORY_BLOCK_SIZE); >>> + spapr_drc_detach(drc, dev, NULL); >> Question: why pass NULL instead of '&local_err' in detach() ? > local_err already contains the error returned by spapr_drc_attach(), it > cannot be used anymore. > >> Can we ignore any >> any error that happens in detach() because you're propagating the >> local_err set >> in spapr_drc_attach() already? >> > This is a rollback path: the best we can do is to try to undo the > already attached DRCs... > >> ps: I am aware that ATM detach() does not propagate anything to the errp >> being >> passed. I don't think it's wise to write new code based on this >> assumption though. >> > ... and indeed detach() cannot fail with the current code base. :) > > David was even planning to drop the errp argument in the "spapr: > Refactor spapr_drc_detach()" patch. Dropping it would be good. There are places in the code that are passing non-null pointers tof the Errp parameter of detach(). > > Otherwise, maybe pass &error_abort since failing to rollback is > likely to be a bug ? It would be saner for the reader but in reality it would be of no avail, given this info that detach() will probably never make use of it. AFAIC you can leave it as NULL. If you end up sending a v3 for other reasons then you can change it for &error_abort in the process. Reviewed-by: Daniel Barboza <danielhb@linux.vnet.ibm.com> > > Cheers, > > -- > Greg > >> Thanks, >> >> >> Daniel >> >>> + } >>> + g_free(fdt); >>> + error_propagate(errp, local_err); >>> + return; >>> + } >>> addr += SPAPR_MEMORY_BLOCK_SIZE; >>> } >>> /* send hotplug notification to the >>> @@ -2651,14 +2663,20 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, >>> addr = object_property_get_uint(OBJECT(dimm), >>> PC_DIMM_ADDR_PROP, &local_err); >>> if (local_err) { >>> - pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); >>> - goto out; >>> + goto out_unplug; >>> } >>> >>> spapr_add_lmbs(dev, addr, size, node, >>> spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT), >>> - &error_abort); >>> + &local_err); >>> + if (local_err) { >>> + goto out_unplug; >>> + } >>> + >>> + return; >>> >>> +out_unplug: >>> + pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); >>> out: >>> error_propagate(errp, local_err); >>> } >>> >>>
On Tue, Jul 04, 2017 at 09:42:52AM +0200, Greg Kurz wrote: > On Mon, 3 Jul 2017 14:03:07 -0300 > Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> wrote: > > > On 07/03/2017 11:54 AM, Greg Kurz wrote: > > > QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails. > > > Let's propagate the error instead, like it is done everywhere else > > > where spapr_drc_attach() is called. > > > > > > Signed-off-by: Greg Kurz <groug@kaod.org> > > > --- > > > Changes in v2: - added rollback code > > > --- > > > hw/ppc/spapr.c | 26 ++++++++++++++++++++++---- > > > 1 file changed, 22 insertions(+), 4 deletions(-) > > > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > > index 70b3fd374e2b..ba8f57a5a054 100644 > > > --- a/hw/ppc/spapr.c > > > +++ b/hw/ppc/spapr.c > > > @@ -2601,6 +2601,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, > > > int i, fdt_offset, fdt_size; > > > void *fdt; > > > uint64_t addr = addr_start; > > > + Error *local_err = NULL; > > > > > > for (i = 0; i < nr_lmbs; i++) { > > > drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, > > > @@ -2611,7 +2612,18 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, > > > fdt_offset = spapr_populate_memory_node(fdt, node, addr, > > > SPAPR_MEMORY_BLOCK_SIZE); > > > > > > - spapr_drc_attach(drc, dev, fdt, fdt_offset, errp); > > > + spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err); > > > + if (local_err) { > > > + while (addr > addr_start) { > > > + addr -= SPAPR_MEMORY_BLOCK_SIZE; > > > + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, > > > + addr / SPAPR_MEMORY_BLOCK_SIZE); > > > + spapr_drc_detach(drc, dev, NULL); > > > > Question: why pass NULL instead of '&local_err' in detach() ? > > local_err already contains the error returned by spapr_drc_attach(), it > cannot be used anymore. > > > Can we ignore any > > any error that happens in detach() because you're propagating the > > local_err set > > in spapr_drc_attach() already? > > This is a rollback path: the best we can do is to try to undo the > already attached DRCs... Yeah, this is basically the classic exception-during-destructor problem. Dealing with an error condition while in the middle of rolling back another error can get really messy. > > ps: I am aware that ATM detach() does not propagate anything to the errp > > being > > passed. I don't think it's wise to write new code based on this > > assumption though. > > > > ... and indeed detach() cannot fail with the current code base. :) > David was even planning to drop the errp argument in the "spapr: > Refactor spapr_drc_detach()" patch. Yeah, but as you pointed out in comments on that patch, that's probably not a good idea. Except.. then I looked deeper still and the story gets more complex again. I'll revisit that when I respin those DRC patches. > Otherwise, maybe pass &error_abort since failing to rollback is > likely to be a bug ? Yeah, I think the only sane options here are either to ignore (second order) errors, or abort. My usual inclination would be to abort, but I'm a bit concerned that provides a means by which the guest can crash qemu. I'm going to apply the patch as is for now, since I'm pretty confident that it makes things better than they weren. I hope we can continue to improve the detach error handling as a follow up. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
© 2016 - 2024 Red Hat, Inc.