hw/acpi/acpi-pci-hotplug-stub.c | 4 ---- hw/acpi/pcihp.c | 6 ------ hw/acpi/piix4.c | 11 ++++++++++- include/hw/acpi/pcihp.h | 2 -- 4 files changed, 10 insertions(+), 13 deletions(-)
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
The 'acpi_index' field is a statically configured field, which for
some reason is migrated; this never makes much sense because it's
command line static.
However, on piix4 it's conditional, and the condition/test function
ends up having the wrong pointer passed to it (it gets a PIIX4PMState
not the AcpiPciHpState it was expecting, because VMSTATE_PCI_HOTPLUG
is a macro and not another struct). This means the field is randomly
loaded/saved based on a random pointer. In 6.x this random pointer
randomly seems to get 0 for everyone (!); in 7.0rc it's getting junk
and trying to load a field that the source didn't send. The migration
stream gets out of line and hits the section footer.
The bodge is on piix4 never to load the field:
a) Most 6.x builds never send it, so most of the time the migration
will work.
b) We can backport this fix to 6.x to remove the boobytrap.
c) It should never have made a difference anyway since the acpi-index
is command line configured and should be correct on the destination
anyway
d) ich9 is still sending/receiving this (unconditionally all the time)
but due to (c) should never notice. We could follow up to make it
skip.
It worries me just when (a) actually happens.
Fixes: b32bd76 ("pci: introduce acpi-index property for PCI device")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/932
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
hw/acpi/acpi-pci-hotplug-stub.c | 4 ----
hw/acpi/pcihp.c | 6 ------
hw/acpi/piix4.c | 11 ++++++++++-
include/hw/acpi/pcihp.h | 2 --
4 files changed, 10 insertions(+), 13 deletions(-)
diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c
index 734e4c5986..a43f6dafc9 100644
--- a/hw/acpi/acpi-pci-hotplug-stub.c
+++ b/hw/acpi/acpi-pci-hotplug-stub.c
@@ -41,7 +41,3 @@ void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off)
return;
}
-bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id)
-{
- return false;
-}
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index 6351bd3424..bf65bbea49 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -554,12 +554,6 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
OBJ_PROP_FLAG_READ);
}
-bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id)
-{
- AcpiPciHpState *s = opaque;
- return s->acpi_index;
-}
-
const VMStateDescription vmstate_acpi_pcihp_pci_status = {
.name = "acpi_pcihp_pci_status",
.version_id = 1,
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index cc37fa3416..48aeedd5f0 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -267,6 +267,15 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id)
return pm_smbus_vmstate_needed();
}
+/*
+ * This is a fudge to turn off the acpi_index field, whose
+ * test was always broken on piix4.
+ */
+static bool vmstate_test_never(void *opaque, int version_id)
+{
+ return false;
+}
+
/* qemu-kvm 1.2 uses version 3 but advertised as 2
* To support incoming qemu-kvm 1.2 migration, change version_id
* and minimum_version_id to 2 below (which breaks migration from
@@ -297,7 +306,7 @@ static const VMStateDescription vmstate_acpi = {
struct AcpiPciHpPciStatus),
VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug, PIIX4PMState,
vmstate_test_use_acpi_hotplug_bridge,
- vmstate_acpi_pcihp_use_acpi_index),
+ vmstate_test_never),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription*[]) {
diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h
index af1a169fc3..7e268c2c9c 100644
--- a/include/hw/acpi/pcihp.h
+++ b/include/hw/acpi/pcihp.h
@@ -73,8 +73,6 @@ void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off);
extern const VMStateDescription vmstate_acpi_pcihp_pci_status;
-bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id);
-
#define VMSTATE_PCI_HOTPLUG(pcihp, state, test_pcihp, test_acpi_index) \
VMSTATE_UINT32_TEST(pcihp.hotplug_select, state, \
test_pcihp), \
--
2.35.1
Am 05.04.22 um 21:06 schrieb Dr. David Alan Gilbert (git): > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > The 'acpi_index' field is a statically configured field, which for > some reason is migrated; this never makes much sense because it's > command line static. > > However, on piix4 it's conditional, and the condition/test function > ends up having the wrong pointer passed to it (it gets a PIIX4PMState > not the AcpiPciHpState it was expecting, because VMSTATE_PCI_HOTPLUG > is a macro and not another struct). This means the field is randomly > loaded/saved based on a random pointer. In 6.x this random pointer > randomly seems to get 0 for everyone (!); in 7.0rc it's getting junk > and trying to load a field that the source didn't send. The migration > stream gets out of line and hits the section footer. > > The bodge is on piix4 never to load the field: > a) Most 6.x builds never send it, so most of the time the migration > will work. > b) We can backport this fix to 6.x to remove the boobytrap. > c) It should never have made a difference anyway since the acpi-index > is command line configured and should be correct on the destination > anyway > d) ich9 is still sending/receiving this (unconditionally all the time) > but due to (c) should never notice. We could follow up to make it > skip. > > It worries me just when (a) actually happens. > > Fixes: b32bd76 ("pci: introduce acpi-index property for PCI device") > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/932 > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > hw/acpi/acpi-pci-hotplug-stub.c | 4 ---- > hw/acpi/pcihp.c | 6 ------ > hw/acpi/piix4.c | 11 ++++++++++- > include/hw/acpi/pcihp.h | 2 -- > 4 files changed, 10 insertions(+), 13 deletions(-) > Fixes the issue for me, thanks! Tested-by: Fabian Ebner <f.ebner@proxmox.com>
Good work! :) Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> On Tue, Apr 5, 2022 at 11:07 PM Dr. David Alan Gilbert (git) < dgilbert@redhat.com> wrote: > -- Marc-André Lureau
On Tue, Apr 05, 2022 at 08:06:58PM +0100, Dr. David Alan Gilbert (git) wrote: The patch is fine but pls repost as text not as application/octet-stream. Thanks! -- MST
* Michael S. Tsirkin (mst@redhat.com) wrote: > On Tue, Apr 05, 2022 at 08:06:58PM +0100, Dr. David Alan Gilbert (git) wrote: > > The patch is fine but pls repost as text not as > application/octet-stream. I've just reposted it with a fudged header that should be fine. ( 20220406083531.10217-1-dgilbert@redhat.com ) Dave > Thanks! > > -- > MST > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
* Michael S. Tsirkin (mst@redhat.com) wrote: > On Tue, Apr 05, 2022 at 08:06:58PM +0100, Dr. David Alan Gilbert (git) wrote: > > The patch is fine but pls repost as text not as > application/octet-stream. Let me try and figure out how; this is the same git send-email I've used for years; it's our corporate mail setup that's screwing the header up. Dave > Thanks! > > -- > MST > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Tue, 5 Apr 2022 20:06:58 +0100 "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote: > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > The 'acpi_index' field is a statically configured field, which for > some reason is migrated; this never makes much sense because it's > command line static. > > However, on piix4 it's conditional, and the condition/test function > ends up having the wrong pointer passed to it (it gets a PIIX4PMState > not the AcpiPciHpState it was expecting, because VMSTATE_PCI_HOTPLUG > is a macro and not another struct). This means the field is randomly > loaded/saved based on a random pointer. In 6.x this random pointer > randomly seems to get 0 for everyone (!); in 7.0rc it's getting junk > and trying to load a field that the source didn't send. FWIW, after some hunting and pecking, 6.2 (64bit): (gdb) p &((struct AcpiPciHpState *)0)->acpi_index $1 = (uint32_t *) 0xc04 (gdb) p &((struct PIIX4PMState *)0)->ar.tmr.io.addr $2 = (hwaddr *) 0xc00 f53faa70bb63: (gdb) p &((struct AcpiPciHpState *)0)->acpi_index $1 = (uint32_t *) 0xc04 (gdb) p &((struct PIIX4PMState *)0)->io_gpe.coalesced.tqh_circ.tql_prev $2 = (struct QTailQLink **) 0xc00 So yeah, it seems 0xc04 will always be part of a pointer on current mainline. I can't really speak to the ACPIPMTimer MemoryRegion in the PIIX4PMState, maybe if there's a hwaddr it's always 32bit and the upper dword is reliably zero? Thanks, Alex > The migration > stream gets out of line and hits the section footer. > > The bodge is on piix4 never to load the field: > a) Most 6.x builds never send it, so most of the time the migration > will work. > b) We can backport this fix to 6.x to remove the boobytrap. > c) It should never have made a difference anyway since the acpi-index > is command line configured and should be correct on the destination > anyway > d) ich9 is still sending/receiving this (unconditionally all the time) > but due to (c) should never notice. We could follow up to make it > skip. > > It worries me just when (a) actually happens. > > Fixes: b32bd76 ("pci: introduce acpi-index property for PCI device") > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/932 > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > hw/acpi/acpi-pci-hotplug-stub.c | 4 ---- > hw/acpi/pcihp.c | 6 ------ > hw/acpi/piix4.c | 11 ++++++++++- > include/hw/acpi/pcihp.h | 2 -- > 4 files changed, 10 insertions(+), 13 deletions(-) > > diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c > index 734e4c5986..a43f6dafc9 100644 > --- a/hw/acpi/acpi-pci-hotplug-stub.c > +++ b/hw/acpi/acpi-pci-hotplug-stub.c > @@ -41,7 +41,3 @@ void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off) > return; > } > > -bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id) > -{ > - return false; > -} > diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c > index 6351bd3424..bf65bbea49 100644 > --- a/hw/acpi/pcihp.c > +++ b/hw/acpi/pcihp.c > @@ -554,12 +554,6 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, > OBJ_PROP_FLAG_READ); > } > > -bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id) > -{ > - AcpiPciHpState *s = opaque; > - return s->acpi_index; > -} > - > const VMStateDescription vmstate_acpi_pcihp_pci_status = { > .name = "acpi_pcihp_pci_status", > .version_id = 1, > diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c > index cc37fa3416..48aeedd5f0 100644 > --- a/hw/acpi/piix4.c > +++ b/hw/acpi/piix4.c > @@ -267,6 +267,15 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) > return pm_smbus_vmstate_needed(); > } > > +/* > + * This is a fudge to turn off the acpi_index field, whose > + * test was always broken on piix4. > + */ > +static bool vmstate_test_never(void *opaque, int version_id) > +{ > + return false; > +} > + > /* qemu-kvm 1.2 uses version 3 but advertised as 2 > * To support incoming qemu-kvm 1.2 migration, change version_id > * and minimum_version_id to 2 below (which breaks migration from > @@ -297,7 +306,7 @@ static const VMStateDescription vmstate_acpi = { > struct AcpiPciHpPciStatus), > VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug, PIIX4PMState, > vmstate_test_use_acpi_hotplug_bridge, > - vmstate_acpi_pcihp_use_acpi_index), > + vmstate_test_never), > VMSTATE_END_OF_LIST() > }, > .subsections = (const VMStateDescription*[]) { > diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h > index af1a169fc3..7e268c2c9c 100644 > --- a/include/hw/acpi/pcihp.h > +++ b/include/hw/acpi/pcihp.h > @@ -73,8 +73,6 @@ void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off); > > extern const VMStateDescription vmstate_acpi_pcihp_pci_status; > > -bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id); > - > #define VMSTATE_PCI_HOTPLUG(pcihp, state, test_pcihp, test_acpi_index) \ > VMSTATE_UINT32_TEST(pcihp.hotplug_select, state, \ > test_pcihp), \
* Alex Williamson (alex.williamson@redhat.com) wrote: > On Tue, 5 Apr 2022 20:06:58 +0100 > "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote: > > > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > > > The 'acpi_index' field is a statically configured field, which for > > some reason is migrated; this never makes much sense because it's > > command line static. > > > > However, on piix4 it's conditional, and the condition/test function > > ends up having the wrong pointer passed to it (it gets a PIIX4PMState > > not the AcpiPciHpState it was expecting, because VMSTATE_PCI_HOTPLUG > > is a macro and not another struct). This means the field is randomly > > loaded/saved based on a random pointer. In 6.x this random pointer > > randomly seems to get 0 for everyone (!); in 7.0rc it's getting junk > > and trying to load a field that the source didn't send. > > FWIW, after some hunting and pecking, 6.2 (64bit): Ah thanks for doing that. > (gdb) p &((struct AcpiPciHpState *)0)->acpi_index > $1 = (uint32_t *) 0xc04 > > (gdb) p &((struct PIIX4PMState *)0)->ar.tmr.io.addr > $2 = (hwaddr *) 0xc00 > > f53faa70bb63: > > (gdb) p &((struct AcpiPciHpState *)0)->acpi_index > $1 = (uint32_t *) 0xc04 > > (gdb) p &((struct PIIX4PMState *)0)->io_gpe.coalesced.tqh_circ.tql_prev > $2 = (struct QTailQLink **) 0xc00 > > So yeah, it seems 0xc04 will always be part of a pointer on current > mainline. I can't really speak to the ACPIPMTimer MemoryRegion in the > PIIX4PMState, maybe if there's a hwaddr it's always 32bit and the upper > dword is reliably zero? Thanks, I'm a bit confused by that: memory_region_init_io(&ar->tmr.io, memory_region_owner(parent), &acpi_pm_tmr_ops, ar, "acpi-tmr", 4); memory_region_add_subregion(parent, 8, &ar->tmr.io); .write = acpi_pm_tmr_write, static void acpi_pm_tmr_write(void *opaque, hwaddr addr, uint64_t val, unsigned width) { /* nothing */ } so, hmm I don't see it writing to that? Dave > Alex > > > The migration > > stream gets out of line and hits the section footer. > > > > The bodge is on piix4 never to load the field: > > a) Most 6.x builds never send it, so most of the time the migration > > will work. > > b) We can backport this fix to 6.x to remove the boobytrap. > > c) It should never have made a difference anyway since the acpi-index > > is command line configured and should be correct on the destination > > anyway > > d) ich9 is still sending/receiving this (unconditionally all the time) > > but due to (c) should never notice. We could follow up to make it > > skip. > > > > It worries me just when (a) actually happens. > > > > Fixes: b32bd76 ("pci: introduce acpi-index property for PCI device") > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/932 > > > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > > --- > > hw/acpi/acpi-pci-hotplug-stub.c | 4 ---- > > hw/acpi/pcihp.c | 6 ------ > > hw/acpi/piix4.c | 11 ++++++++++- > > include/hw/acpi/pcihp.h | 2 -- > > 4 files changed, 10 insertions(+), 13 deletions(-) > > > > diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c > > index 734e4c5986..a43f6dafc9 100644 > > --- a/hw/acpi/acpi-pci-hotplug-stub.c > > +++ b/hw/acpi/acpi-pci-hotplug-stub.c > > @@ -41,7 +41,3 @@ void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off) > > return; > > } > > > > -bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id) > > -{ > > - return false; > > -} > > diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c > > index 6351bd3424..bf65bbea49 100644 > > --- a/hw/acpi/pcihp.c > > +++ b/hw/acpi/pcihp.c > > @@ -554,12 +554,6 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, > > OBJ_PROP_FLAG_READ); > > } > > > > -bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id) > > -{ > > - AcpiPciHpState *s = opaque; > > - return s->acpi_index; > > -} > > - > > const VMStateDescription vmstate_acpi_pcihp_pci_status = { > > .name = "acpi_pcihp_pci_status", > > .version_id = 1, > > diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c > > index cc37fa3416..48aeedd5f0 100644 > > --- a/hw/acpi/piix4.c > > +++ b/hw/acpi/piix4.c > > @@ -267,6 +267,15 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) > > return pm_smbus_vmstate_needed(); > > } > > > > +/* > > + * This is a fudge to turn off the acpi_index field, whose > > + * test was always broken on piix4. > > + */ > > +static bool vmstate_test_never(void *opaque, int version_id) > > +{ > > + return false; > > +} > > + > > /* qemu-kvm 1.2 uses version 3 but advertised as 2 > > * To support incoming qemu-kvm 1.2 migration, change version_id > > * and minimum_version_id to 2 below (which breaks migration from > > @@ -297,7 +306,7 @@ static const VMStateDescription vmstate_acpi = { > > struct AcpiPciHpPciStatus), > > VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug, PIIX4PMState, > > vmstate_test_use_acpi_hotplug_bridge, > > - vmstate_acpi_pcihp_use_acpi_index), > > + vmstate_test_never), > > VMSTATE_END_OF_LIST() > > }, > > .subsections = (const VMStateDescription*[]) { > > diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h > > index af1a169fc3..7e268c2c9c 100644 > > --- a/include/hw/acpi/pcihp.h > > +++ b/include/hw/acpi/pcihp.h > > @@ -73,8 +73,6 @@ void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off); > > > > extern const VMStateDescription vmstate_acpi_pcihp_pci_status; > > > > -bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id); > > - > > #define VMSTATE_PCI_HOTPLUG(pcihp, state, test_pcihp, test_acpi_index) \ > > VMSTATE_UINT32_TEST(pcihp.hotplug_select, state, \ > > test_pcihp), \ > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
© 2016 - 2024 Red Hat, Inc.