[PATCH RESEND v2 1/3] acpi/ghes: Extend acpi_ghes_memory_errors() to support multiple CPERs

Gavin Shan posted 3 patches 1 month, 1 week ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Igor Mammedov <imammedo@redhat.com>, Ani Sinha <anisinha@redhat.com>, Dongjiu Geng <gengdongjiu1@gmail.com>, Peter Maydell <peter.maydell@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>
There is a newer version of this series
[PATCH RESEND v2 1/3] acpi/ghes: Extend acpi_ghes_memory_errors() to support multiple CPERs
Posted by Gavin Shan 1 month, 1 week ago
In the situation where host and guest has 64KB and 4KB page sizes, one
error on the host's page affects 16 guest's pages. we need to send 16
consective errors in this specific case.

Extend acpi_ghes_memory_errors() to support multiple CPERs after the
hunk of code to generate the GHES error status is pulled out from
ghes_gen_err_data_uncorrectable_recoverable().

No functional changes intended.

Signed-off-by: Gavin Shan <gshan@redhat.com>
---
 hw/acpi/ghes-stub.c    |  2 +-
 hw/acpi/ghes.c         | 27 ++++++++++++++-------------
 include/hw/acpi/ghes.h |  2 +-
 target/arm/kvm.c       |  7 ++++++-
 4 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
index 40f660c246..c1f8c9bec9 100644
--- a/hw/acpi/ghes-stub.c
+++ b/hw/acpi/ghes-stub.c
@@ -12,7 +12,7 @@
 #include "hw/acpi/ghes.h"
 
 int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
-                            uint64_t physical_address)
+                            GArray *addresses)
 {
     return -1;
 }
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index 06555905ce..045b77715f 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -214,18 +214,13 @@ static void acpi_ghes_build_append_mem_cper(GArray *table,
 
 static void
 ghes_gen_err_data_uncorrectable_recoverable(GArray *block,
-                                            const uint8_t *section_type,
-                                            int data_length)
+                                            const uint8_t *section_type)
 {
     /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data,
      * Table 17-13 Generic Error Data Entry
      */
     QemuUUID fru_id = {};
 
-    /* Build the new generic error status block header */
-    acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE,
-        0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE);
-
     /* Build this new generic error data entry header */
     acpi_ghes_generic_error_data(block, section_type,
         ACPI_CPER_SEV_RECOVERABLE, 0, 0,
@@ -557,19 +552,20 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
 }
 
 int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
-                            uint64_t physical_address)
+                            GArray *addresses)
 {
     /* Memory Error Section Type */
     const uint8_t guid[] =
           UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \
                   0xED, 0x7C, 0x83, 0xB1);
     Error *errp = NULL;
-    int data_length;
+    int data_length, i;
     GArray *block;
 
     block = g_array_new(false, true /* clear */, 1);
 
-    data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH;
+    data_length = addresses->len *
+                  (ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH);
     /*
      * It should not run out of the preallocated memory if adding a new generic
      * error data entry
@@ -577,10 +573,15 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
     assert((data_length + ACPI_GHES_GESB_SIZE) <=
             ACPI_GHES_MAX_RAW_DATA_LENGTH);
 
-    ghes_gen_err_data_uncorrectable_recoverable(block, guid, data_length);
-
-    /* Build the memory section CPER for above new generic error data entry */
-    acpi_ghes_build_append_mem_cper(block, physical_address);
+    /* Build the new generic error status block header */
+    acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE, 0, 0,
+                                   data_length, ACPI_CPER_SEV_RECOVERABLE);
+    for (i = 0; i < addresses->len; i++) {
+        ghes_gen_err_data_uncorrectable_recoverable(block, guid);
+        /* Memory section CPER on top of the generic error data entry */
+        acpi_ghes_build_append_mem_cper(block,
+                                        g_array_index(addresses, uint64_t, i));
+    }
 
     /* Report the error */
     ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index df2ecbf6e4..a8cbc520d5 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -99,7 +99,7 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
 void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
                           GArray *hardware_errors);
 int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
-                            uint64_t error_physical_addr);
+                            GArray *addresses);
 void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
                              uint16_t source_id, Error **errp);
 
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 4f769d69b3..9a47ac9e3a 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -2434,6 +2434,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
     ram_addr_t ram_addr;
     hwaddr paddr;
     AcpiGhesState *ags;
+    GArray *addresses;
 
     assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
 
@@ -2442,6 +2443,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
         ram_addr = qemu_ram_addr_from_host(addr);
         if (ram_addr != RAM_ADDR_INVALID &&
             kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
+            addresses = g_array_new(false, false, sizeof(paddr));
             kvm_hwpoison_page_add(ram_addr);
             /*
              * If this is a BUS_MCEERR_AR, we know we have been called
@@ -2454,16 +2456,19 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
              * later from the main thread, so doing the injection of
              * the error would be more complicated.
              */
+            g_array_append_vals(addresses, &paddr, 1);
             if (code == BUS_MCEERR_AR) {
                 kvm_cpu_synchronize_state(c);
                 if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
-                                             paddr)) {
+                                             addresses)) {
                     kvm_inject_arm_sea(c);
                 } else {
                     error_report("failed to record the error");
                     abort();
                 }
             }
+
+            g_array_free(addresses, true);
             return;
         }
         if (code == BUS_MCEERR_AO) {
-- 
2.51.0
Re: [PATCH RESEND v2 1/3] acpi/ghes: Extend acpi_ghes_memory_errors() to support multiple CPERs
Posted by Igor Mammedov 2 weeks ago
On Tue,  7 Oct 2025 16:08:08 +1000
Gavin Shan <gshan@redhat.com> wrote:

> In the situation where host and guest has 64KB and 4KB page sizes, one
> error on the host's page affects 16 guest's pages. we need to send 16
> consective errors in this specific case.
> 
> Extend acpi_ghes_memory_errors() to support multiple CPERs after the
> hunk of code to generate the GHES error status is pulled out from
> ghes_gen_err_data_uncorrectable_recoverable().
> 
> No functional changes intended.
> 
> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
>  hw/acpi/ghes-stub.c    |  2 +-
>  hw/acpi/ghes.c         | 27 ++++++++++++++-------------
>  include/hw/acpi/ghes.h |  2 +-
>  target/arm/kvm.c       |  7 ++++++-
>  4 files changed, 22 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
> index 40f660c246..c1f8c9bec9 100644
> --- a/hw/acpi/ghes-stub.c
> +++ b/hw/acpi/ghes-stub.c
> @@ -12,7 +12,7 @@
>  #include "hw/acpi/ghes.h"
>  
>  int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> -                            uint64_t physical_address)
> +                            GArray *addresses)
>  {
>      return -1;
>  }
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index 06555905ce..045b77715f 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -214,18 +214,13 @@ static void acpi_ghes_build_append_mem_cper(GArray *table,
>  
>  static void
>  ghes_gen_err_data_uncorrectable_recoverable(GArray *block,
> -                                            const uint8_t *section_type,
> -                                            int data_length)
> +                                            const uint8_t *section_type)
>  {
>      /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data,
>       * Table 17-13 Generic Error Data Entry
>       */
>      QemuUUID fru_id = {};
>  
> -    /* Build the new generic error status block header */
> -    acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE,
> -        0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE);
> -
>      /* Build this new generic error data entry header */
>      acpi_ghes_generic_error_data(block, section_type,
>          ACPI_CPER_SEV_RECOVERABLE, 0, 0,
> @@ -557,19 +552,20 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
>  }
>  
>  int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> -                            uint64_t physical_address)
> +                            GArray *addresses)
>  {
>      /* Memory Error Section Type */
>      const uint8_t guid[] =
>            UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \
>                    0xED, 0x7C, 0x83, 0xB1);
>      Error *errp = NULL;
> -    int data_length;
> +    int data_length, i;
>      GArray *block;
>  
>      block = g_array_new(false, true /* clear */, 1);
>  
> -    data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH;
> +    data_length = addresses->len *
> +                  (ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH);
>      /*
>       * It should not run out of the preallocated memory if adding a new generic
>       * error data entry
> @@ -577,10 +573,15 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>      assert((data_length + ACPI_GHES_GESB_SIZE) <=
>              ACPI_GHES_MAX_RAW_DATA_LENGTH);
>  
> -    ghes_gen_err_data_uncorrectable_recoverable(block, guid, data_length);
> -
> -    /* Build the memory section CPER for above new generic error data entry */
> -    acpi_ghes_build_append_mem_cper(block, physical_address);
> +    /* Build the new generic error status block header */
> +    acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE, 0, 0,
                                             ^^^^^
with following loop, it might be no enough. See ACPI6.5 Tables 18.11: Status field

Don't we need to set multiple_foo bit(s) and also fix 'Error Data Entry Count' bits?


> +                                   data_length, ACPI_CPER_SEV_RECOVERABLE);
> +    for (i = 0; i < addresses->len; i++) {
> +        ghes_gen_err_data_uncorrectable_recoverable(block, guid);
> +        /* Memory section CPER on top of the generic error data entry */
> +        acpi_ghes_build_append_mem_cper(block,
> +                                        g_array_index(addresses, uint64_t, i));
> +    }

>  
>      /* Report the error */
>      ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
> index df2ecbf6e4..a8cbc520d5 100644
> --- a/include/hw/acpi/ghes.h
> +++ b/include/hw/acpi/ghes.h
> @@ -99,7 +99,7 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
>  void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
>                            GArray *hardware_errors);
>  int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> -                            uint64_t error_physical_addr);
> +                            GArray *addresses);
>  void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
>                               uint16_t source_id, Error **errp);
>  
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 4f769d69b3..9a47ac9e3a 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -2434,6 +2434,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>      ram_addr_t ram_addr;
>      hwaddr paddr;
>      AcpiGhesState *ags;
> +    GArray *addresses;
>  
>      assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
>  
> @@ -2442,6 +2443,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>          ram_addr = qemu_ram_addr_from_host(addr);
>          if (ram_addr != RAM_ADDR_INVALID &&
>              kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
> +            addresses = g_array_new(false, false, sizeof(paddr));
>              kvm_hwpoison_page_add(ram_addr);
>              /*
>               * If this is a BUS_MCEERR_AR, we know we have been called
> @@ -2454,16 +2456,19 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>               * later from the main thread, so doing the injection of
>               * the error would be more complicated.
>               */
> +            g_array_append_vals(addresses, &paddr, 1);
>              if (code == BUS_MCEERR_AR) {
>                  kvm_cpu_synchronize_state(c);
>                  if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> -                                             paddr)) {
> +                                             addresses)) {
>                      kvm_inject_arm_sea(c);
>                  } else {
>                      error_report("failed to record the error");
>                      abort();
>                  }
>              }
> +
> +            g_array_free(addresses, true);
>              return;
>          }
>          if (code == BUS_MCEERR_AO) {
Re: [PATCH RESEND v2 1/3] acpi/ghes: Extend acpi_ghes_memory_errors() to support multiple CPERs
Posted by Gavin Shan 1 week, 4 days ago
On 10/31/25 11:17 PM, Igor Mammedov wrote:
> On Tue,  7 Oct 2025 16:08:08 +1000
> Gavin Shan <gshan@redhat.com> wrote:
> 
>> In the situation where host and guest has 64KB and 4KB page sizes, one
>> error on the host's page affects 16 guest's pages. we need to send 16
>> consective errors in this specific case.
>>
>> Extend acpi_ghes_memory_errors() to support multiple CPERs after the
>> hunk of code to generate the GHES error status is pulled out from
>> ghes_gen_err_data_uncorrectable_recoverable().
>>
>> No functional changes intended.
>>
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> ---
>>   hw/acpi/ghes-stub.c    |  2 +-
>>   hw/acpi/ghes.c         | 27 ++++++++++++++-------------
>>   include/hw/acpi/ghes.h |  2 +-
>>   target/arm/kvm.c       |  7 ++++++-
>>   4 files changed, 22 insertions(+), 16 deletions(-)
>>
>> diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
>> index 40f660c246..c1f8c9bec9 100644
>> --- a/hw/acpi/ghes-stub.c
>> +++ b/hw/acpi/ghes-stub.c
>> @@ -12,7 +12,7 @@
>>   #include "hw/acpi/ghes.h"
>>   
>>   int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> -                            uint64_t physical_address)
>> +                            GArray *addresses)
>>   {
>>       return -1;
>>   }
>> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
>> index 06555905ce..045b77715f 100644
>> --- a/hw/acpi/ghes.c
>> +++ b/hw/acpi/ghes.c
>> @@ -214,18 +214,13 @@ static void acpi_ghes_build_append_mem_cper(GArray *table,
>>   
>>   static void
>>   ghes_gen_err_data_uncorrectable_recoverable(GArray *block,
>> -                                            const uint8_t *section_type,
>> -                                            int data_length)
>> +                                            const uint8_t *section_type)
>>   {
>>       /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data,
>>        * Table 17-13 Generic Error Data Entry
>>        */
>>       QemuUUID fru_id = {};
>>   
>> -    /* Build the new generic error status block header */
>> -    acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE,
>> -        0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE);
>> -
>>       /* Build this new generic error data entry header */
>>       acpi_ghes_generic_error_data(block, section_type,
>>           ACPI_CPER_SEV_RECOVERABLE, 0, 0,
>> @@ -557,19 +552,20 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
>>   }
>>   
>>   int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> -                            uint64_t physical_address)
>> +                            GArray *addresses)
>>   {
>>       /* Memory Error Section Type */
>>       const uint8_t guid[] =
>>             UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \
>>                     0xED, 0x7C, 0x83, 0xB1);
>>       Error *errp = NULL;
>> -    int data_length;
>> +    int data_length, i;
>>       GArray *block;
>>   
>>       block = g_array_new(false, true /* clear */, 1);
>>   
>> -    data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH;
>> +    data_length = addresses->len *
>> +                  (ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH);
>>       /*
>>        * It should not run out of the preallocated memory if adding a new generic
>>        * error data entry
>> @@ -577,10 +573,15 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>>       assert((data_length + ACPI_GHES_GESB_SIZE) <=
>>               ACPI_GHES_MAX_RAW_DATA_LENGTH);
>>   
>> -    ghes_gen_err_data_uncorrectable_recoverable(block, guid, data_length);
>> -
>> -    /* Build the memory section CPER for above new generic error data entry */
>> -    acpi_ghes_build_append_mem_cper(block, physical_address);
>> +    /* Build the new generic error status block header */
>> +    acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE, 0, 0,
>                                               ^^^^^
> with following loop, it might be no enough. See ACPI6.5 Tables 18.11: Status field
> 
> Don't we need to set multiple_foo bit(s) and also fix 'Error Data Entry Count' bits?
> 

Ack, those two fields need to be fixed accordingly.

>> +                                   data_length, ACPI_CPER_SEV_RECOVERABLE);
>> +    for (i = 0; i < addresses->len; i++) {
>> +        ghes_gen_err_data_uncorrectable_recoverable(block, guid);
>> +        /* Memory section CPER on top of the generic error data entry */
>> +        acpi_ghes_build_append_mem_cper(block,
>> +                                        g_array_index(addresses, uint64_t, i));
>> +    }
> 
>>   
>>       /* Report the error */
>>       ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
>> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
>> index df2ecbf6e4..a8cbc520d5 100644
>> --- a/include/hw/acpi/ghes.h
>> +++ b/include/hw/acpi/ghes.h
>> @@ -99,7 +99,7 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
>>   void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
>>                             GArray *hardware_errors);
>>   int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> -                            uint64_t error_physical_addr);
>> +                            GArray *addresses);
>>   void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
>>                                uint16_t source_id, Error **errp);
>>   
>> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
>> index 4f769d69b3..9a47ac9e3a 100644
>> --- a/target/arm/kvm.c
>> +++ b/target/arm/kvm.c
>> @@ -2434,6 +2434,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>>       ram_addr_t ram_addr;
>>       hwaddr paddr;
>>       AcpiGhesState *ags;
>> +    GArray *addresses;
>>   
>>       assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
>>   
>> @@ -2442,6 +2443,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>>           ram_addr = qemu_ram_addr_from_host(addr);
>>           if (ram_addr != RAM_ADDR_INVALID &&
>>               kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
>> +            addresses = g_array_new(false, false, sizeof(paddr));
>>               kvm_hwpoison_page_add(ram_addr);
>>               /*
>>                * If this is a BUS_MCEERR_AR, we know we have been called
>> @@ -2454,16 +2456,19 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>>                * later from the main thread, so doing the injection of
>>                * the error would be more complicated.
>>                */
>> +            g_array_append_vals(addresses, &paddr, 1);
>>               if (code == BUS_MCEERR_AR) {
>>                   kvm_cpu_synchronize_state(c);
>>                   if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
>> -                                             paddr)) {
>> +                                             addresses)) {
>>                       kvm_inject_arm_sea(c);
>>                   } else {
>>                       error_report("failed to record the error");
>>                       abort();
>>                   }
>>               }
>> +
>> +            g_array_free(addresses, true);
>>               return;
>>           }
>>           if (code == BUS_MCEERR_AO) {

Thanks,
Gavin
Re: [PATCH RESEND v2 1/3] acpi/ghes: Extend acpi_ghes_memory_errors() to support multiple CPERs
Posted by Jonathan Cameron via 2 weeks ago
On Tue,  7 Oct 2025 16:08:08 +1000
Gavin Shan <gshan@redhat.com> wrote:

> In the situation where host and guest has 64KB and 4KB page sizes, one
> error on the host's page affects 16 guest's pages. we need to send 16
> consective errors in this specific case.

Hi Gavin,

Sorry this one has been on my to review list far too long.

> 
> Extend acpi_ghes_memory_errors() to support multiple CPERs after the
> hunk of code to generate the GHES error status is pulled out from
> ghes_gen_err_data_uncorrectable_recoverable().

I think this description needs to be more detailed wrt to how those
multiple CPERs are handled.  Specifically that they are in a single
error status block (so should only represent related errors.)

This is to make it clear this isn't queuing events, but instead just
presenting them as one block.

> 
> No functional changes intended.
> 
> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
>  hw/acpi/ghes-stub.c    |  2 +-
>  hw/acpi/ghes.c         | 27 ++++++++++++++-------------
>  include/hw/acpi/ghes.h |  2 +-
>  target/arm/kvm.c       |  7 ++++++-
>  4 files changed, 22 insertions(+), 16 deletions(-)

> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index 06555905ce..045b77715f 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -214,18 +214,13 @@ static void acpi_ghes_build_append_mem_cper(GArray *table,
>  
>  static void
>  ghes_gen_err_data_uncorrectable_recoverable(GArray *block,
> -                                            const uint8_t *section_type,
> -                                            int data_length)
> +                                            const uint8_t *section_type)
>  {
>      /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data,
>       * Table 17-13 Generic Error Data Entry
>       */
>      QemuUUID fru_id = {};
>  
> -    /* Build the new generic error status block header */
> -    acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE,
> -        0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE);
> -]

With this bit gone, is it worth having the helper?  Perhaps just move
the remains to where it is called.

>      /* Build this new generic error data entry header */
>      acpi_ghes_generic_error_data(block, section_type,
>          ACPI_CPER_SEV_RECOVERABLE, 0, 0,

> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 4f769d69b3..9a47ac9e3a 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -2434,6 +2434,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>      ram_addr_t ram_addr;
>      hwaddr paddr;
>      AcpiGhesState *ags;
> +    GArray *addresses;
>  
>      assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
>  
> @@ -2442,6 +2443,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>          ram_addr = qemu_ram_addr_from_host(addr);
>          if (ram_addr != RAM_ADDR_INVALID &&
>              kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
> +            addresses = g_array_new(false, false, sizeof(paddr));

Given you are going to free in all paths, maybe a g_autofree?

Also, we know this only grows to a fixed max size (16 after patch 3), so maybe just
provide a hwaddr paddrs[16]; and pass forwards the count?

>              kvm_hwpoison_page_add(ram_addr);
>              /*
>               * If this is a BUS_MCEERR_AR, we know we have been called
> @@ -2454,16 +2456,19 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>               * later from the main thread, so doing the injection of
>               * the error would be more complicated.
>               */
> +            g_array_append_vals(addresses, &paddr, 1);
>              if (code == BUS_MCEERR_AR) {
>                  kvm_cpu_synchronize_state(c);
>                  if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> -                                             paddr)) {
> +                                             addresses)) {
>                      kvm_inject_arm_sea(c);
>                  } else {
>                      error_report("failed to record the error");
>                      abort();
>                  }
>              }
> +
> +            g_array_free(addresses, true);
>              return;
>          }
>          if (code == BUS_MCEERR_AO) {
Re: [PATCH RESEND v2 1/3] acpi/ghes: Extend acpi_ghes_memory_errors() to support multiple CPERs
Posted by Jonathan Cameron via 2 weeks ago
On Fri, 31 Oct 2025 09:58:50 +0000
Jonathan Cameron <jonathan.cameron@huawei.com> wrote:

> On Tue,  7 Oct 2025 16:08:08 +1000
> Gavin Shan <gshan@redhat.com> wrote:
> 
> > In the situation where host and guest has 64KB and 4KB page sizes, one
> > error on the host's page affects 16 guest's pages. we need to send 16
> > consective errors in this specific case.  
> 
> Hi Gavin,
> 
> Sorry this one has been on my to review list far too long.
> 
> > 
> > Extend acpi_ghes_memory_errors() to support multiple CPERs after the
> > hunk of code to generate the GHES error status is pulled out from
> > ghes_gen_err_data_uncorrectable_recoverable().  
> 
> I think this description needs to be more detailed wrt to how those
> multiple CPERs are handled.  Specifically that they are in a single
> error status block (so should only represent related errors.)
> 
> This is to make it clear this isn't queuing events, but instead just
> presenting them as one block.
> 
> > 
> > No functional changes intended.
> > 
> > Signed-off-by: Gavin Shan <gshan@redhat.com>
> > ---
> >  hw/acpi/ghes-stub.c    |  2 +-
> >  hw/acpi/ghes.c         | 27 ++++++++++++++-------------
> >  include/hw/acpi/ghes.h |  2 +-
> >  target/arm/kvm.c       |  7 ++++++-
> >  4 files changed, 22 insertions(+), 16 deletions(-)  
> 
> > diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> > index 06555905ce..045b77715f 100644
> > --- a/hw/acpi/ghes.c
> > +++ b/hw/acpi/ghes.c
> > @@ -214,18 +214,13 @@ static void acpi_ghes_build_append_mem_cper(GArray *table,
> >  
> >  static void
> >  ghes_gen_err_data_uncorrectable_recoverable(GArray *block,
> > -                                            const uint8_t *section_type,
> > -                                            int data_length)
> > +                                            const uint8_t *section_type)
> >  {
> >      /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data,
> >       * Table 17-13 Generic Error Data Entry
> >       */
> >      QemuUUID fru_id = {};
> >  
> > -    /* Build the new generic error status block header */
> > -    acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE,
> > -        0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE);
> > -]  
> 
> With this bit gone, is it worth having the helper?  Perhaps just move
> the remains to where it is called.
> 
> >      /* Build this new generic error data entry header */
> >      acpi_ghes_generic_error_data(block, section_type,
> >          ACPI_CPER_SEV_RECOVERABLE, 0, 0,  
> 
> > diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> > index 4f769d69b3..9a47ac9e3a 100644
> > --- a/target/arm/kvm.c
> > +++ b/target/arm/kvm.c
> > @@ -2434,6 +2434,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
> >      ram_addr_t ram_addr;
> >      hwaddr paddr;
> >      AcpiGhesState *ags;
> > +    GArray *addresses;
> >  
> >      assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
> >  
> > @@ -2442,6 +2443,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
> >          ram_addr = qemu_ram_addr_from_host(addr);
> >          if (ram_addr != RAM_ADDR_INVALID &&
> >              kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
> > +            addresses = g_array_new(false, false, sizeof(paddr));  
> 
> Given you are going to free in all paths, maybe a g_autofree?

Oddly there is no use of g_auto() with g_array in qemu. Ah well, next suggestion is
my preference anyway.

> 
> Also, we know this only grows to a fixed max size (16 after patch 3), so maybe just
> provide a hwaddr paddrs[16]; and pass forwards the count?
> 
> >              kvm_hwpoison_page_add(ram_addr);
> >              /*
> >               * If this is a BUS_MCEERR_AR, we know we have been called
> > @@ -2454,16 +2456,19 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
> >               * later from the main thread, so doing the injection of
> >               * the error would be more complicated.
> >               */
> > +            g_array_append_vals(addresses, &paddr, 1);
> >              if (code == BUS_MCEERR_AR) {
> >                  kvm_cpu_synchronize_state(c);
> >                  if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> > -                                             paddr)) {
> > +                                             addresses)) {
> >                      kvm_inject_arm_sea(c);
> >                  } else {
> >                      error_report("failed to record the error");
> >                      abort();
> >                  }
> >              }
> > +
> > +            g_array_free(addresses, true);
> >              return;
> >          }
> >          if (code == BUS_MCEERR_AO) {  
>
Re: [PATCH RESEND v2 1/3] acpi/ghes: Extend acpi_ghes_memory_errors() to support multiple CPERs
Posted by Gavin Shan 1 week, 4 days ago
Hi Jonathan,

On 10/31/25 8:08 PM, Jonathan Cameron wrote:
> On Fri, 31 Oct 2025 09:58:50 +0000
> Jonathan Cameron <jonathan.cameron@huawei.com> wrote:
> 
>> On Tue,  7 Oct 2025 16:08:08 +1000
>> Gavin Shan <gshan@redhat.com> wrote:
>>
>>> In the situation where host and guest has 64KB and 4KB page sizes, one
>>> error on the host's page affects 16 guest's pages. we need to send 16
>>> consective errors in this specific case.
>>
>> Hi Gavin,
>>
>> Sorry this one has been on my to review list far too long.
>>

No worries. Thanks for your review and comments. Igor suggested to have
separate error source for each vCPU. In that way, there will have multiple
read_ack_reg and the specific issue addressed by this series can be resolved.

Answering your question below.

>>>
>>> Extend acpi_ghes_memory_errors() to support multiple CPERs after the
>>> hunk of code to generate the GHES error status is pulled out from
>>> ghes_gen_err_data_uncorrectable_recoverable().
>>
>> I think this description needs to be more detailed wrt to how those
>> multiple CPERs are handled.  Specifically that they are in a single
>> error status block (so should only represent related errors.)
>>
>> This is to make it clear this isn't queuing events, but instead just
>> presenting them as one block.
>>
>>>
>>> No functional changes intended.
>>>
>>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>>> ---
>>>   hw/acpi/ghes-stub.c    |  2 +-
>>>   hw/acpi/ghes.c         | 27 ++++++++++++++-------------
>>>   include/hw/acpi/ghes.h |  2 +-
>>>   target/arm/kvm.c       |  7 ++++++-
>>>   4 files changed, 22 insertions(+), 16 deletions(-)
>>
>>> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
>>> index 06555905ce..045b77715f 100644
>>> --- a/hw/acpi/ghes.c
>>> +++ b/hw/acpi/ghes.c
>>> @@ -214,18 +214,13 @@ static void acpi_ghes_build_append_mem_cper(GArray *table,
>>>   
>>>   static void
>>>   ghes_gen_err_data_uncorrectable_recoverable(GArray *block,
>>> -                                            const uint8_t *section_type,
>>> -                                            int data_length)
>>> +                                            const uint8_t *section_type)
>>>   {
>>>       /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data,
>>>        * Table 17-13 Generic Error Data Entry
>>>        */
>>>       QemuUUID fru_id = {};
>>>   
>>> -    /* Build the new generic error status block header */
>>> -    acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE,
>>> -        0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE);
>>> -]
>>
>> With this bit gone, is it worth having the helper?  Perhaps just move
>> the remains to where it is called.
>>

Ack.

>>>       /* Build this new generic error data entry header */
>>>       acpi_ghes_generic_error_data(block, section_type,
>>>           ACPI_CPER_SEV_RECOVERABLE, 0, 0,
>>
>>> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
>>> index 4f769d69b3..9a47ac9e3a 100644
>>> --- a/target/arm/kvm.c
>>> +++ b/target/arm/kvm.c
>>> @@ -2434,6 +2434,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>>>       ram_addr_t ram_addr;
>>>       hwaddr paddr;
>>>       AcpiGhesState *ags;
>>> +    GArray *addresses;
>>>   
>>>       assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
>>>   
>>> @@ -2442,6 +2443,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>>>           ram_addr = qemu_ram_addr_from_host(addr);
>>>           if (ram_addr != RAM_ADDR_INVALID &&
>>>               kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
>>> +            addresses = g_array_new(false, false, sizeof(paddr));
>>
>> Given you are going to free in all paths, maybe a g_autofree?
> 
> Oddly there is no use of g_auto() with g_array in qemu. Ah well, next suggestion is
> my preference anyway.
> 

Something like below can be used, and GArray is interchangeable to a regular array.
I guess GArray is preferred in QEMU.

     g_autoptr(GArray) sources = g_array_new(false, true,
                                             sizeof(AcpiNotificationSourceId));

>>
>> Also, we know this only grows to a fixed max size (16 after patch 3), so maybe just
>> provide a hwaddr paddrs[16]; and pass forwards the count?
>>
>>>               kvm_hwpoison_page_add(ram_addr);
>>>               /*
>>>                * If this is a BUS_MCEERR_AR, we know we have been called
>>> @@ -2454,16 +2456,19 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>>>                * later from the main thread, so doing the injection of
>>>                * the error would be more complicated.
>>>                */
>>> +            g_array_append_vals(addresses, &paddr, 1);
>>>               if (code == BUS_MCEERR_AR) {
>>>                   kvm_cpu_synchronize_state(c);
>>>                   if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
>>> -                                             paddr)) {
>>> +                                             addresses)) {
>>>                       kvm_inject_arm_sea(c);
>>>                   } else {
>>>                       error_report("failed to record the error");
>>>                       abort();
>>>                   }
>>>               }
>>> +
>>> +            g_array_free(addresses, true);
>>>               return;
>>>           }
>>>           if (code == BUS_MCEERR_AO) {
>>

Thanks,
Gavin