[PATCH v10 02/21] acpi/generic_event_device: Update GHES migration to cover hest addr

Mauro Carvalho Chehab posted 21 patches 2 months, 1 week ago
[PATCH v10 02/21] acpi/generic_event_device: Update GHES migration to cover hest addr
Posted by Mauro Carvalho Chehab 2 months, 1 week ago
The GHES migration logic at GED should now support HEST table
location too.

Increase migration version and change needed to check for both
ghes_addr_le and hest_addr_le.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 hw/acpi/generic_event_device.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index 15b4c3ebbf24..4e5e387ee2df 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -343,10 +343,11 @@ static const VMStateDescription vmstate_ged_state = {
 
 static const VMStateDescription vmstate_ghes = {
     .name = "acpi-ghes",
-    .version_id = 1,
-    .minimum_version_id = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
     .fields = (const VMStateField[]) {
         VMSTATE_UINT64(ghes_addr_le, AcpiGhesState),
+        VMSTATE_UINT64(hest_addr_le, AcpiGhesState),
         VMSTATE_END_OF_LIST()
     },
 };
@@ -354,13 +355,13 @@ static const VMStateDescription vmstate_ghes = {
 static bool ghes_needed(void *opaque)
 {
     AcpiGedState *s = opaque;
-    return s->ghes_state.ghes_addr_le;
+    return s->ghes_state.ghes_addr_le && s->ghes_state.hest_addr_le;
 }
 
 static const VMStateDescription vmstate_ghes_state = {
     .name = "acpi-ged/ghes",
-    .version_id = 1,
-    .minimum_version_id = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
     .needed = ghes_needed,
     .fields = (const VMStateField[]) {
         VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
-- 
2.46.0
Re: [PATCH v10 02/21] acpi/generic_event_device: Update GHES migration to cover hest addr
Posted by Igor Mammedov 2 months, 1 week ago
On Sat, 14 Sep 2024 08:13:23 +0200
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:

> The GHES migration logic at GED should now support HEST table
> location too.
> 
> Increase migration version and change needed to check for both
> ghes_addr_le and hest_addr_le.
> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> ---
>  hw/acpi/generic_event_device.c | 11 ++++++-----
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
> index 15b4c3ebbf24..4e5e387ee2df 100644
> --- a/hw/acpi/generic_event_device.c
> +++ b/hw/acpi/generic_event_device.c
> @@ -343,10 +343,11 @@ static const VMStateDescription vmstate_ged_state = {
>  
>  static const VMStateDescription vmstate_ghes = {
>      .name = "acpi-ghes",
> -    .version_id = 1,
> -    .minimum_version_id = 1,
> +    .version_id = 2,
> +    .minimum_version_id = 2,
>      .fields = (const VMStateField[]) {
>          VMSTATE_UINT64(ghes_addr_le, AcpiGhesState),
> +        VMSTATE_UINT64(hest_addr_le, AcpiGhesState),
>          VMSTATE_END_OF_LIST()
>      },
>  };
> @@ -354,13 +355,13 @@ static const VMStateDescription vmstate_ghes = {
>  static bool ghes_needed(void *opaque)
>  {
>      AcpiGedState *s = opaque;
> -    return s->ghes_state.ghes_addr_le;
                            ^^^^^^^^^^^^
another thing, perhaps we should rename it to 'hardware_errors_addr'
to make it less confusing 

> +    return s->ghes_state.ghes_addr_le && s->ghes_state.hest_addr_le;
>  }
>  
>  static const VMStateDescription vmstate_ghes_state = {
>      .name = "acpi-ged/ghes",
> -    .version_id = 1,
> -    .minimum_version_id = 1,
> +    .version_id = 2,
> +    .minimum_version_id = 2,
>      .needed = ghes_needed,
>      .fields = (const VMStateField[]) {
>          VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
Re: [PATCH v10 02/21] acpi/generic_event_device: Update GHES migration to cover hest addr
Posted by Mauro Carvalho Chehab 1 month, 4 weeks ago
Em Tue, 17 Sep 2024 14:01:46 +0200
Igor Mammedov <imammedo@redhat.com> escreveu:

> > @@ -354,13 +355,13 @@ static const VMStateDescription vmstate_ghes = {
> >  static bool ghes_needed(void *opaque)
> >  {
> >      AcpiGedState *s = opaque;
> > -    return s->ghes_state.ghes_addr_le;  
>                             ^^^^^^^^^^^^
> another thing, perhaps we should rename it to 'hardware_errors_addr'
> to make it less confusing 

At the cleanups, I added a rename patch. I opted to a shorter name:
	hw_error_le.

Thanks,
Mauro
Re: [PATCH v10 02/21] acpi/generic_event_device: Update GHES migration to cover hest addr
Posted by Igor Mammedov 2 months, 1 week ago
On Sat, 14 Sep 2024 08:13:23 +0200
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:

> The GHES migration logic at GED should now support HEST table
> location too.
> 
> Increase migration version and change needed to check for both
> ghes_addr_le and hest_addr_le
But I don't think it will work like this (but I might be easily wrong)
However I don't know enough to properly review this patch, CCing Peter & Fabiano

> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> ---
>  hw/acpi/generic_event_device.c | 11 ++++++-----
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
> index 15b4c3ebbf24..4e5e387ee2df 100644
> --- a/hw/acpi/generic_event_device.c
> +++ b/hw/acpi/generic_event_device.c
> @@ -343,10 +343,11 @@ static const VMStateDescription vmstate_ged_state = {
>  
>  static const VMStateDescription vmstate_ghes = {
>      .name = "acpi-ghes",
> -    .version_id = 1,
> -    .minimum_version_id = 1,
> +    .version_id = 2,
> +    .minimum_version_id = 2,
>      .fields = (const VMStateField[]) {
>          VMSTATE_UINT64(ghes_addr_le, AcpiGhesState),
> +        VMSTATE_UINT64(hest_addr_le, AcpiGhesState),
>          VMSTATE_END_OF_LIST()
>      },
>  };
> @@ -354,13 +355,13 @@ static const VMStateDescription vmstate_ghes = {
>  static bool ghes_needed(void *opaque)
>  {
>      AcpiGedState *s = opaque;
> -    return s->ghes_state.ghes_addr_le;
> +    return s->ghes_state.ghes_addr_le && s->ghes_state.hest_addr_le;
>  }

what I would do:
  add ghes_needed_v2(): return  s->ghes_state.hest_addr_le;

and then instead of reusing vmstate_ghes_state would add new
vmstate_ghes_v2_state subsection that migrates only 
  VMSTATE_UINT64(hest_addr_le, AcpiGhesState)
field.

btw: we probably don't need ghes_addr_le for new code that
uses HEST to lookup relevant error status block.
but we should still keep it for 9.1 and older machine types
as they expect/use it. Separate subsections would work with
this req just fine.

>  static const VMStateDescription vmstate_ghes_state = {
>      .name = "acpi-ged/ghes",
> -    .version_id = 1,
> -    .minimum_version_id = 1,
> +    .version_id = 2,
> +    .minimum_version_id = 2,
>      .needed = ghes_needed,
>      .fields = (const VMStateField[]) {
>          VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
Re: [PATCH v10 02/21] acpi/generic_event_device: Update GHES migration to cover hest addr
Posted by Peter Xu 2 months, 1 week ago
On Tue, Sep 17, 2024 at 11:19:21AM +0200, Igor Mammedov wrote:
> On Sat, 14 Sep 2024 08:13:23 +0200
> Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> 
> > The GHES migration logic at GED should now support HEST table
> > location too.
> > 
> > Increase migration version and change needed to check for both
> > ghes_addr_le and hest_addr_le
> But I don't think it will work like this (but I might be easily wrong)
> However I don't know enough to properly review this patch, CCing Peter & Fabiano
> 
> > Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> > ---
> >  hw/acpi/generic_event_device.c | 11 ++++++-----
> >  1 file changed, 6 insertions(+), 5 deletions(-)
> > 
> > diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
> > index 15b4c3ebbf24..4e5e387ee2df 100644
> > --- a/hw/acpi/generic_event_device.c
> > +++ b/hw/acpi/generic_event_device.c
> > @@ -343,10 +343,11 @@ static const VMStateDescription vmstate_ged_state = {
> >  
> >  static const VMStateDescription vmstate_ghes = {
> >      .name = "acpi-ghes",
> > -    .version_id = 1,
> > -    .minimum_version_id = 1,
> > +    .version_id = 2,
> > +    .minimum_version_id = 2,
> >      .fields = (const VMStateField[]) {
> >          VMSTATE_UINT64(ghes_addr_le, AcpiGhesState),
> > +        VMSTATE_UINT64(hest_addr_le, AcpiGhesState),
> >          VMSTATE_END_OF_LIST()
> >      },
> >  };
> > @@ -354,13 +355,13 @@ static const VMStateDescription vmstate_ghes = {
> >  static bool ghes_needed(void *opaque)
> >  {
> >      AcpiGedState *s = opaque;
> > -    return s->ghes_state.ghes_addr_le;
> > +    return s->ghes_state.ghes_addr_le && s->ghes_state.hest_addr_le;
> >  }
> 
> what I would do:
>   add ghes_needed_v2(): return  s->ghes_state.hest_addr_le;
> 
> and then instead of reusing vmstate_ghes_state would add new
> vmstate_ghes_v2_state subsection that migrates only 
>   VMSTATE_UINT64(hest_addr_le, AcpiGhesState)
> field.
> 
> btw: we probably don't need ghes_addr_le for new code that
> uses HEST to lookup relevant error status block.
> but we should still keep it for 9.1 and older machine types
> as they expect/use it. Separate subsections would work with
> this req just fine.

Right, if we need bi-directional migration we need above and a compat
property (or VMSTATE_UINT64_TEST() would work too, iiuc).

OTOH VMSD versioning only works for forward migration, not backward.

> 
> >  static const VMStateDescription vmstate_ghes_state = {
> >      .name = "acpi-ged/ghes",
> > -    .version_id = 1,
> > -    .minimum_version_id = 1,
> > +    .version_id = 2,
> > +    .minimum_version_id = 2,

(and IIUC if we set min ver=2, even forward migration should fail.. better
 test it with an old binary, migrating back and forth)

> >      .needed = ghes_needed,
> >      .fields = (const VMStateField[]) {
> >          VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
> 

Thanks,

-- 
Peter Xu
Re: [PATCH v10 02/21] acpi/generic_event_device: Update GHES migration to cover hest addr
Posted by Mauro Carvalho Chehab 1 month, 4 weeks ago
Em Tue, 17 Sep 2024 11:22:31 -0400
Peter Xu <peterx@redhat.com> escreveu:

> On Tue, Sep 17, 2024 at 11:19:21AM +0200, Igor Mammedov wrote:
> > On Sat, 14 Sep 2024 08:13:23 +0200
> > Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> >   

> > what I would do:
> >   add ghes_needed_v2(): return  s->ghes_state.hest_addr_le;
> > 
> > and then instead of reusing vmstate_ghes_state would add new
> > vmstate_ghes_v2_state subsection that migrates only 
> >   VMSTATE_UINT64(hest_addr_le, AcpiGhesState)
> > field.
> > 
> > btw: we probably don't need ghes_addr_le for new code that
> > uses HEST to lookup relevant error status block.
> > but we should still keep it for 9.1 and older machine types
> > as they expect/use it. Separate subsections would work with
> > this req just fine.  

Ok, so, if I understood it right, the enclosed patch should do the
job, right?

> Right, if we need bi-directional migration we need above and a compat
> property (or VMSTATE_UINT64_TEST() would work too, iiuc).
> 
> OTOH VMSD versioning only works for forward migration, not backward.

I don't think bi-directional migration would work. See, with
old version, we have:

- Just one Error source block structure, only for ARMv8 using synchronous
  notification (SEA).
- The offsets to access the error block structure and the memory
  position where the OSPM will acknowledge the error assumes a single
  error source;
- such offsets come from ghes_addr_le bios pointer (we will rename it to
  hw_addr_le at the cleanup patch series).

With the new versions, we'll have:

- at least two notification sources on ARMv8 (SEA and GPIO). We may
  end adding more with time;
- other architectures may also have support for bios-first hardware
  errors;
- the number of error block structures is now read from HEST table
  (so it needs that hest_addr_le will be stored at AcpiGedState;
- the offsets to retrieve the addresses are now relative to the offset
  at hest_addr_le.

The new error injection code, which uses either hest_addr_le or
ghes_addr_le (now hw_addr_le) should be able to properly generate
errors from a VM created on 9.1, as it will check if hest_addr_le
is not zero. If it is zero, it will call a backward-compatible
code:

    acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
                                                       NULL));
    if (!acpi_ged_state) {
        error_setg(errp, "Can't find ACPI_GED object");
        return;
    }
    ags = &acpi_ged_state->ghes_state;

    if (!ags->hest_addr_le) {
	/* Assumes just a single source_id */
        get_ghes_offsets(le64_to_cpu(ags->hw_error_le),
                         &cper_addr, &read_ack_register_addr);
    } else {
	/* do a for at the HEST table seeking for an specific source_id */
        get_hest_offsets(source_id, le64_to_cpu(ags->hest_addr_le),
                         &cper_addr, &read_ack_register_addr, errp);
    }

Now, a VM created with 9.2 will have multiple sources. The location of the
read_ack_register_addr depends on the number of sources, which can't be
retrieved without a VIOS pointer to the location of the HEST table
(e. g. ags->hest_addr_le).

So, a 9.1 QEMU with a VM created on 9.2 won't be doing the right thing
with regards to the vaule of the ack offset, thus with RAS errors not
working. I can't see any way to make it work.

> >   
> > >  static const VMStateDescription vmstate_ghes_state = {
> > >      .name = "acpi-ged/ghes",
> > > -    .version_id = 1,
> > > -    .minimum_version_id = 1,
> > > +    .version_id = 2,
> > > +    .minimum_version_id = 2,  
> 
> (and IIUC if we set min ver=2, even forward migration should fail.. better
>  test it with an old binary, migrating back and forth)
> 
> > >      .needed = ghes_needed,
> > >      .fields = (const VMStateField[]) {
> > >          VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,  
> >   
> 
> Thanks,
> 

Thanks,
Mauro

---

[PATCH] acpi/generic_event_device: Update GHES migration to cover hest addr

The GHES migration logic at GED should now support HEST table
location too.

Increase migration version and change needed to check for both
ghes_addr_le and hest_addr_le.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index c5acfb204e5f..bd996d01390c 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -377,6 +377,34 @@ static const VMStateDescription vmstate_ghes_state = {
     }
 };
 
+static const VMStateDescription vmstate_hest = {
+    .name = "acpi-ghes",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (const VMStateField[]) {
+        VMSTATE_UINT64(hest_addr_le, AcpiGhesState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static bool hest_needed(void *opaque)
+{
+    AcpiGedState *s = opaque;
+    return s->ghes_state.hest_addr_le;
+}
+
+static const VMStateDescription vmstate_hest_state = {
+    .name = "acpi-ged/ghes",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = hest_needed,
+    .fields = (const VMStateField[]) {
+        VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
+                       vmstate_hest, AcpiGhesState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_acpi_ged = {
     .name = "acpi-ged",
     .version_id = 1,
@@ -388,6 +416,7 @@ static const VMStateDescription vmstate_acpi_ged = {
     .subsections = (const VMStateDescription * const []) {
         &vmstate_memhp_state,
         &vmstate_ghes_state,
+        &vmstate_hest_state,
         NULL
     }
 };