hw/i386/pc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
QEMU currently crashes when you try to hot-plug an "nvdimm" device
on older machine types:
$ qemu-system-x86_64 -monitor stdio -M pc-1.1
QEMU 3.1.92 monitor - type 'help' for more information
(qemu) device_add nvdimm,id=nvdimmn1
qemu-system-x86_64: /home/thuth/devel/qemu/util/error.c:57: error_setv:
Assertion `*errp == ((void *)0)' failed.
Aborted (core dumped)
The call to hotplug_handler_pre_plug() in pc_memory_pre_plug() has been
added recently before the check whether nvdimm is enabled. It should
be done after the check. And while we're at it, also check the errp
after the hotplug_handler_pre_plug(), otherwise errors are silently
ignored here.
Fixes: 9040e6dfa8c3fed87695a3de555d2c775727bb51
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
hw/i386/pc.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 6077d27361..f2c15bf1f2 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -2078,6 +2078,7 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
const MachineState *ms = MACHINE(hotplug_dev);
const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
const uint64_t legacy_align = TARGET_PAGE_SIZE;
+ Error *local_err = NULL;
/*
* When -no-acpi is used with Q35 machine type, no ACPI is built,
@@ -2090,13 +2091,17 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
return;
}
- hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp);
-
if (is_nvdimm && !ms->nvdimms_state->is_enabled) {
error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'");
return;
}
+ hotplug_handler_pre_plug(pcms->acpi_dev, dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev),
pcmc->enforce_aligned_dimm ? NULL : &legacy_align, errp);
}
--
2.21.0
On 07/04/19 11:23, Thomas Huth wrote: > QEMU currently crashes when you try to hot-plug an "nvdimm" device > on older machine types: > > $ qemu-system-x86_64 -monitor stdio -M pc-1.1 > QEMU 3.1.92 monitor - type 'help' for more information > (qemu) device_add nvdimm,id=nvdimmn1 > qemu-system-x86_64: /home/thuth/devel/qemu/util/error.c:57: error_setv: > Assertion `*errp == ((void *)0)' failed. > Aborted (core dumped) > > The call to hotplug_handler_pre_plug() in pc_memory_pre_plug() has been > added recently before the check whether nvdimm is enabled. It should > be done after the check. And while we're at it, also check the errp > after the hotplug_handler_pre_plug(), otherwise errors are silently > ignored here. > > Fixes: 9040e6dfa8c3fed87695a3de555d2c775727bb51 > Signed-off-by: Thomas Huth <thuth@redhat.com> > --- > hw/i386/pc.c | 9 +++++++-- > 1 file changed, 7 insertions(+), 2 deletions(-) > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 6077d27361..f2c15bf1f2 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -2078,6 +2078,7 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > const MachineState *ms = MACHINE(hotplug_dev); > const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); > const uint64_t legacy_align = TARGET_PAGE_SIZE; > + Error *local_err = NULL; > > /* > * When -no-acpi is used with Q35 machine type, no ACPI is built, > @@ -2090,13 +2091,17 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > return; > } > > - hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp); > - > if (is_nvdimm && !ms->nvdimms_state->is_enabled) { > error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'"); > return; > } > > + hotplug_handler_pre_plug(pcms->acpi_dev, dev, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + > pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), > pcmc->enforce_aligned_dimm ? NULL : &legacy_align, errp); > } > Queued, thanks. Paolo
On Sun, Apr 07, 2019 at 11:23:14AM +0200, Thomas Huth wrote: >QEMU currently crashes when you try to hot-plug an "nvdimm" device >on older machine types: > >$ qemu-system-x86_64 -monitor stdio -M pc-1.1 >QEMU 3.1.92 monitor - type 'help' for more information >(qemu) device_add nvdimm,id=nvdimmn1 >qemu-system-x86_64: /home/thuth/devel/qemu/util/error.c:57: error_setv: > Assertion `*errp == ((void *)0)' failed. >Aborted (core dumped) > >The call to hotplug_handler_pre_plug() in pc_memory_pre_plug() has been >added recently before the check whether nvdimm is enabled. It should >be done after the check. And while we're at it, also check the errp >after the hotplug_handler_pre_plug(), otherwise errors are silently >ignored here. Thomas, Thanks for pointing this out, while I have some different idea on how to fix this. The reason of the core dump is errp already been set in hotplug_handler_pre_plug(), and this function check acpi hotplug capability. The order of this check is correct, while we should return when errp is set in hotplug_handler_pre_plug(). I got a fix like this, which I have tested and looks good to me. diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 6077d27361..b11f3b15c1 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2091,6 +2091,9 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, } hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp); + if (*errp) { + return; + } if (is_nvdimm && !ms->nvdimms_state->is_enabled) { error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'"); > >Fixes: 9040e6dfa8c3fed87695a3de555d2c775727bb51 >Signed-off-by: Thomas Huth <thuth@redhat.com> >--- > hw/i386/pc.c | 9 +++++++-- > 1 file changed, 7 insertions(+), 2 deletions(-) > >diff --git a/hw/i386/pc.c b/hw/i386/pc.c >index 6077d27361..f2c15bf1f2 100644 >--- a/hw/i386/pc.c >+++ b/hw/i386/pc.c >@@ -2078,6 +2078,7 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > const MachineState *ms = MACHINE(hotplug_dev); > const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); > const uint64_t legacy_align = TARGET_PAGE_SIZE; >+ Error *local_err = NULL; > > /* > * When -no-acpi is used with Q35 machine type, no ACPI is built, >@@ -2090,13 +2091,17 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > return; > } > >- hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp); >- > if (is_nvdimm && !ms->nvdimms_state->is_enabled) { > error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'"); > return; > } > >+ hotplug_handler_pre_plug(pcms->acpi_dev, dev, &local_err); >+ if (local_err) { >+ error_propagate(errp, local_err); >+ return; >+ } >+ > pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), > pcmc->enforce_aligned_dimm ? NULL : &legacy_align, errp); > } >-- >2.21.0 -- Wei Yang Help you, Help me
On 08/04/2019 15.45, Wei Yang wrote: > On Sun, Apr 07, 2019 at 11:23:14AM +0200, Thomas Huth wrote: >> QEMU currently crashes when you try to hot-plug an "nvdimm" device >> on older machine types: >> >> $ qemu-system-x86_64 -monitor stdio -M pc-1.1 >> QEMU 3.1.92 monitor - type 'help' for more information >> (qemu) device_add nvdimm,id=nvdimmn1 >> qemu-system-x86_64: /home/thuth/devel/qemu/util/error.c:57: error_setv: >> Assertion `*errp == ((void *)0)' failed. >> Aborted (core dumped) >> >> The call to hotplug_handler_pre_plug() in pc_memory_pre_plug() has been >> added recently before the check whether nvdimm is enabled. It should >> be done after the check. And while we're at it, also check the errp >> after the hotplug_handler_pre_plug(), otherwise errors are silently >> ignored here. > > Thomas, > > Thanks for pointing this out, while I have some different idea on how to fix > this. > > The reason of the core dump is errp already been set in > hotplug_handler_pre_plug(), and this function check acpi hotplug capability. > The order of this check is correct, while we should return when errp is set > in hotplug_handler_pre_plug(). > > I got a fix like this, which I have tested and looks good to me. > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 6077d27361..b11f3b15c1 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -2091,6 +2091,9 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > } > > hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp); > + if (*errp) { > + return; > + } Not sure, but I think you can not rely on the fact that the caller set *errp = NULL already... that's why it is more common to use a local_err variable and error_propagate() for such cases (which is what I did in my patch). Also, why don't you want the "nvdimm is not enabled: missing 'nvdimm' in '-M'" check to be done first? Thomas > if (is_nvdimm && !ms->nvdimms_state->is_enabled) { > error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'"); > >> >> Fixes: 9040e6dfa8c3fed87695a3de555d2c775727bb51 >> Signed-off-by: Thomas Huth <thuth@redhat.com> >> --- >> hw/i386/pc.c | 9 +++++++-- >> 1 file changed, 7 insertions(+), 2 deletions(-) >> >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c >> index 6077d27361..f2c15bf1f2 100644 >> --- a/hw/i386/pc.c >> +++ b/hw/i386/pc.c >> @@ -2078,6 +2078,7 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, >> const MachineState *ms = MACHINE(hotplug_dev); >> const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); >> const uint64_t legacy_align = TARGET_PAGE_SIZE; >> + Error *local_err = NULL; >> >> /* >> * When -no-acpi is used with Q35 machine type, no ACPI is built, >> @@ -2090,13 +2091,17 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, >> return; >> } >> >> - hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp); >> - >> if (is_nvdimm && !ms->nvdimms_state->is_enabled) { >> error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'"); >> return; >> } >> >> + hotplug_handler_pre_plug(pcms->acpi_dev, dev, &local_err); >> + if (local_err) { >> + error_propagate(errp, local_err); >> + return; >> + } >> + >> pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), >> pcmc->enforce_aligned_dimm ? NULL : &legacy_align, errp); >> } >> -- >> 2.21.0 >
On Mon, Apr 08, 2019 at 05:06:49PM +0200, Thomas Huth wrote: >On 08/04/2019 15.45, Wei Yang wrote: >> On Sun, Apr 07, 2019 at 11:23:14AM +0200, Thomas Huth wrote: >>> QEMU currently crashes when you try to hot-plug an "nvdimm" device >>> on older machine types: >>> >>> $ qemu-system-x86_64 -monitor stdio -M pc-1.1 >>> QEMU 3.1.92 monitor - type 'help' for more information >>> (qemu) device_add nvdimm,id=nvdimmn1 >>> qemu-system-x86_64: /home/thuth/devel/qemu/util/error.c:57: error_setv: >>> Assertion `*errp == ((void *)0)' failed. >>> Aborted (core dumped) >>> >>> The call to hotplug_handler_pre_plug() in pc_memory_pre_plug() has been >>> added recently before the check whether nvdimm is enabled. It should >>> be done after the check. And while we're at it, also check the errp >>> after the hotplug_handler_pre_plug(), otherwise errors are silently >>> ignored here. >> >> Thomas, >> >> Thanks for pointing this out, while I have some different idea on how to fix >> this. >> >> The reason of the core dump is errp already been set in >> hotplug_handler_pre_plug(), and this function check acpi hotplug capability. >> The order of this check is correct, while we should return when errp is set >> in hotplug_handler_pre_plug(). >> >> I got a fix like this, which I have tested and looks good to me. >> >> >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c >> index 6077d27361..b11f3b15c1 100644 >> --- a/hw/i386/pc.c >> +++ b/hw/i386/pc.c >> @@ -2091,6 +2091,9 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, >> } >> >> hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp); >> + if (*errp) { >> + return; >> + } > >Not sure, but I think you can not rely on the fact that the caller set >*errp = NULL already... that's why it is more common to use a local_err >variable and error_propagate() for such cases (which is what I did in my >patch). > Ok, that's fine for me. >Also, why don't you want the "nvdimm is not enabled: missing 'nvdimm' in >'-M'" check to be done first? > Because this function pc_memory_pre_plug() will be called not only when nvdimm is hot-plugged but also dimm is hot-plugged. And hotplug_handler_pre_plug() here is to check the acpi(if it has) hot-plug capability. So the check in pc_memory_pre_plug() is from generic to specific: 1. Do we have capability to hot-plug? 2. If the device is nvdimm, do we enabled nvdimm? > Thomas > > > >> if (is_nvdimm && !ms->nvdimms_state->is_enabled) { >> error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'"); >> >>> >>> Fixes: 9040e6dfa8c3fed87695a3de555d2c775727bb51 >>> Signed-off-by: Thomas Huth <thuth@redhat.com> >>> --- >>> hw/i386/pc.c | 9 +++++++-- >>> 1 file changed, 7 insertions(+), 2 deletions(-) >>> >>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c >>> index 6077d27361..f2c15bf1f2 100644 >>> --- a/hw/i386/pc.c >>> +++ b/hw/i386/pc.c >>> @@ -2078,6 +2078,7 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, >>> const MachineState *ms = MACHINE(hotplug_dev); >>> const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); >>> const uint64_t legacy_align = TARGET_PAGE_SIZE; >>> + Error *local_err = NULL; >>> >>> /* >>> * When -no-acpi is used with Q35 machine type, no ACPI is built, >>> @@ -2090,13 +2091,17 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, >>> return; >>> } >>> >>> - hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp); >>> - >>> if (is_nvdimm && !ms->nvdimms_state->is_enabled) { >>> error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'"); >>> return; >>> } >>> >>> + hotplug_handler_pre_plug(pcms->acpi_dev, dev, &local_err); >>> + if (local_err) { >>> + error_propagate(errp, local_err); >>> + return; >>> + } >>> + >>> pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), >>> pcmc->enforce_aligned_dimm ? NULL : &legacy_align, errp); >>> } >>> -- >>> 2.21.0 >> > -- Wei Yang Help you, Help me
On Mon, Apr 08, 2019 at 09:29:11PM +0000, Wei Yang wrote: >>> >>> Thomas, >>> >>> Thanks for pointing this out, while I have some different idea on how to fix >>> this. >>> >>> The reason of the core dump is errp already been set in >>> hotplug_handler_pre_plug(), and this function check acpi hotplug capability. >>> The order of this check is correct, while we should return when errp is set >>> in hotplug_handler_pre_plug(). >>> >>> I got a fix like this, which I have tested and looks good to me. >>> >>> >>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c >>> index 6077d27361..b11f3b15c1 100644 >>> --- a/hw/i386/pc.c >>> +++ b/hw/i386/pc.c >>> @@ -2091,6 +2091,9 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, >>> } >>> >>> hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp); >>> + if (*errp) { >>> + return; >>> + } >> >>Not sure, but I think you can not rely on the fact that the caller set >>*errp = NULL already... that's why it is more common to use a local_err >>variable and error_propagate() for such cases (which is what I did in my >>patch). >> > >Ok, that's fine for me. > >>Also, why don't you want the "nvdimm is not enabled: missing 'nvdimm' in >>'-M'" check to be done first? >> > >Because this function pc_memory_pre_plug() will be called not only when >nvdimm is hot-plugged but also dimm is hot-plugged. And >hotplug_handler_pre_plug() here is to check the acpi(if it has) hot-plug >capability. > >So the check in pc_memory_pre_plug() is from generic to specific: > 1. Do we have capability to hot-plug? > 2. If the device is nvdimm, do we enabled nvdimm? > Thomas Do you think this is a reasonable explanation? -- Wei Yang Help you, Help me
Hi, On 11/04/2019 03.56, Wei Yang wrote: > On Mon, Apr 08, 2019 at 09:29:11PM +0000, Wei Yang wrote: >>>> >>>> Thomas, >>>> >>>> Thanks for pointing this out, while I have some different idea on how to fix >>>> this. >>>> >>>> The reason of the core dump is errp already been set in >>>> hotplug_handler_pre_plug(), and this function check acpi hotplug capability. >>>> The order of this check is correct, while we should return when errp is set >>>> in hotplug_handler_pre_plug(). >>>> >>>> I got a fix like this, which I have tested and looks good to me. >>>> >>>> >>>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c >>>> index 6077d27361..b11f3b15c1 100644 >>>> --- a/hw/i386/pc.c >>>> +++ b/hw/i386/pc.c >>>> @@ -2091,6 +2091,9 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, >>>> } >>>> >>>> hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp); >>>> + if (*errp) { >>>> + return; >>>> + } >>> >>> Not sure, but I think you can not rely on the fact that the caller set >>> *errp = NULL already... that's why it is more common to use a local_err >>> variable and error_propagate() for such cases (which is what I did in my >>> patch). >>> >> >> Ok, that's fine for me. >> >>> Also, why don't you want the "nvdimm is not enabled: missing 'nvdimm' in >>> '-M'" check to be done first? >>> >> >> Because this function pc_memory_pre_plug() will be called not only when >> nvdimm is hot-plugged but also dimm is hot-plugged. And >> hotplug_handler_pre_plug() here is to check the acpi(if it has) hot-plug >> capability. >> >> So the check in pc_memory_pre_plug() is from generic to specific: >> 1. Do we have capability to hot-plug? >> 2. If the device is nvdimm, do we enabled nvdimm? >> > > Thomas > > Do you think this is a reasonable explanation? Fine for me, I don't mind either way as long as the crash is fixed. Feel free to send a patch that restores the desired order. Thomas
On Mon, Apr 08, 2019 at 05:06:49PM +0200, Thomas Huth wrote: > On 08/04/2019 15.45, Wei Yang wrote: [...] > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > > index 6077d27361..b11f3b15c1 100644 > > --- a/hw/i386/pc.c > > +++ b/hw/i386/pc.c > > @@ -2091,6 +2091,9 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > > } > > > > hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp); > > + if (*errp) { > > + return; > > + } > > Not sure, but I think you can not rely on the fact that the caller set > *errp = NULL already... that's why it is more common to use a local_err > variable and error_propagate() for such cases (which is what I did in my > patch). *errp can't be non-NULL (otherwise functions calling error_setg() would crash). errp can be NULL, though, and that's why you need a local_err variable. I'd love to eliminate NULL errp from our codebase, but I couldn't find a way to do it that is safe and simple (i.e. not letting us pass NULL errp by mistake and not requiring a macro wrapping every `&local_err` expression). -- Eduardo
Eduardo Habkost <ehabkost@redhat.com> writes: > On Mon, Apr 08, 2019 at 05:06:49PM +0200, Thomas Huth wrote: >> On 08/04/2019 15.45, Wei Yang wrote: > [...] >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c >> > index 6077d27361..b11f3b15c1 100644 >> > --- a/hw/i386/pc.c >> > +++ b/hw/i386/pc.c >> > @@ -2091,6 +2091,9 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, >> > } >> > >> > hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp); >> > + if (*errp) { >> > + return; >> > + } >> >> Not sure, but I think you can not rely on the fact that the caller set >> *errp = NULL already... that's why it is more common to use a local_err >> variable and error_propagate() for such cases (which is what I did in my >> patch). > > *errp can't be non-NULL (otherwise functions calling error_setg() > would crash). errp can be NULL, though, and that's why you need > a local_err variable. Correct. The big comment in error.h advises: * Receive an error and pass it on to the caller: * Error *err = NULL; * foo(arg, &err); * if (err) { * handle the error... * error_propagate(errp, err); * } * where Error **errp is a parameter, by convention the last one. * * Do *not* "optimize" this to * foo(arg, errp); * if (*errp) { // WRONG! * handle the error... * } * because errp may be NULL! * * But when all you do with the error is pass it on, please use * foo(arg, errp); * for readability. > I'd love to eliminate NULL errp from our codebase, but I couldn't > find a way to do it that is safe and simple (i.e. not letting us > pass NULL errp by mistake and not requiring a macro wrapping > every `&local_err` expression). Also, I'd prefer not not deviate even more from GError. Apropos NULL, we often pass NULL where we really ought to pass &error_abort.
© 2016 - 2024 Red Hat, Inc.