Use error_abort in acpi_ghes_memory_errors() so that the caller needn't
explicitly call abort() on errors. With this change, its return value
isn't needed any more.
Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/acpi/ghes-stub.c | 6 +++---
hw/acpi/ghes.c | 15 ++++-----------
include/hw/acpi/ghes.h | 5 +++--
target/arm/kvm.c | 10 +++-------
4 files changed, 13 insertions(+), 23 deletions(-)
diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
index 4faf573aeb..4ef914ffc5 100644
--- a/hw/acpi/ghes-stub.c
+++ b/hw/acpi/ghes-stub.c
@@ -11,10 +11,10 @@
#include "qemu/osdep.h"
#include "hw/acpi/ghes.h"
-int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
- uint64_t *addresses, uint32_t num_of_addresses)
+void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
+ uint64_t *addresses, uint32_t num_of_addresses,
+ Error **errp)
{
- return -1;
}
AcpiGhesState *acpi_ghes_get_state(void)
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index 055e5d719a..aa469c03f2 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -543,8 +543,9 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
notifier_list_notify(&acpi_generic_error_notifiers, &source_id);
}
-int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
- uint64_t *addresses, uint32_t num_of_addresses)
+void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
+ uint64_t *addresses, uint32_t num_of_addresses,
+ Error **errp)
{
/* Memory Error Section Type */
const uint8_t guid[] =
@@ -555,7 +556,6 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
* Table 17-13 Generic Error Data Entry
*/
QemuUUID fru_id = {};
- Error *errp = NULL;
int data_length;
GArray *block;
uint32_t block_status, i;
@@ -592,16 +592,9 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
}
/* Report the error */
- ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
+ ghes_record_cper_errors(ags, block->data, block->len, source_id, errp);
g_array_free(block, true);
-
- if (errp) {
- error_report_err(errp);
- return -1;
- }
-
- return 0;
}
AcpiGhesState *acpi_ghes_get_state(void)
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index f73908985d..35c7bbbb01 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -98,8 +98,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
const char *oem_id, const char *oem_table_id);
void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
GArray *hardware_errors);
-int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
- uint64_t *addresses, uint32_t num_of_addresses);
+void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
+ uint64_t *addresses, uint32_t num_of_addresses,
+ Error **errp);
void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
uint16_t source_id, Error **errp);
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 459ca4a9b0..a889315606 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -2458,13 +2458,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
addresses[0] = paddr;
if (code == BUS_MCEERR_AR) {
kvm_cpu_synchronize_state(c);
- if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
- addresses, 1)) {
- kvm_inject_arm_sea(c);
- } else {
- error_report("failed to record the error");
- abort();
- }
+ acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
+ addresses, 1, &error_abort);
+ kvm_inject_arm_sea(c);
}
return;
}
--
2.51.0
Gavin Shan <gshan@redhat.com> writes:
> Use error_abort in acpi_ghes_memory_errors() so that the caller needn't
> explicitly call abort() on errors. With this change, its return value
> isn't needed any more.
>
> Suggested-by: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
> hw/acpi/ghes-stub.c | 6 +++---
> hw/acpi/ghes.c | 15 ++++-----------
> include/hw/acpi/ghes.h | 5 +++--
> target/arm/kvm.c | 10 +++-------
> 4 files changed, 13 insertions(+), 23 deletions(-)
>
> diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
> index 4faf573aeb..4ef914ffc5 100644
> --- a/hw/acpi/ghes-stub.c
> +++ b/hw/acpi/ghes-stub.c
> @@ -11,10 +11,10 @@
> #include "qemu/osdep.h"
> #include "hw/acpi/ghes.h"
>
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> - uint64_t *addresses, uint32_t num_of_addresses)
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> + uint64_t *addresses, uint32_t num_of_addresses,
> + Error **errp)
> {
> - return -1;
> }
Before the patch, this function always fails: it returns -1.
Afterwards, it always succeeds: it doesn't set @errp.
Which one is correct?
>
> AcpiGhesState *acpi_ghes_get_state(void)
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index 055e5d719a..aa469c03f2 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -543,8 +543,9 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
> notifier_list_notify(&acpi_generic_error_notifiers, &source_id);
> }
>
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> - uint64_t *addresses, uint32_t num_of_addresses)
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> + uint64_t *addresses, uint32_t num_of_addresses,
> + Error **errp)
qapi/error.h:
* - Whenever practical, also return a value that indicates success /
* failure. This can make the error checking more concise, and can
* avoid useless error object creation and destruction. Note that
* we still have many functions returning void. We recommend
* • bool-valued functions return true on success / false on failure,
* • pointer-valued functions return non-null / null pointer, and
* • integer-valued functions return non-negative / negative.
> {
> /* Memory Error Section Type */
> const uint8_t guid[] =
> @@ -555,7 +556,6 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> * Table 17-13 Generic Error Data Entry
> */
> QemuUUID fru_id = {};
> - Error *errp = NULL;
> int data_length;
> GArray *block;
> uint32_t block_status, i;
> @@ -592,16 +592,9 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> }
>
> /* Report the error */
> - ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
> + ghes_record_cper_errors(ags, block->data, block->len, source_id, errp);
>
> g_array_free(block, true);
> -
> - if (errp) {
> - error_report_err(errp);
> - return -1;
> - }
> -
> - return 0;
> }
The error reporting moves into the caller.
>
> AcpiGhesState *acpi_ghes_get_state(void)
> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
> index f73908985d..35c7bbbb01 100644
> --- a/include/hw/acpi/ghes.h
> +++ b/include/hw/acpi/ghes.h
> @@ -98,8 +98,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
> const char *oem_id, const char *oem_table_id);
> void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
> GArray *hardware_errors);
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> - uint64_t *addresses, uint32_t num_of_addresses);
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> + uint64_t *addresses, uint32_t num_of_addresses,
> + Error **errp);
> void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
> uint16_t source_id, Error **errp);
>
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 459ca4a9b0..a889315606 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -2458,13 +2458,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
> addresses[0] = paddr;
> if (code == BUS_MCEERR_AR) {
> kvm_cpu_synchronize_state(c);
> - if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> - addresses, 1)) {
> - kvm_inject_arm_sea(c);
> - } else {
> - error_report("failed to record the error");
> - abort();
> - }
> + acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> + addresses, 1, &error_abort);
> + kvm_inject_arm_sea(c);
Before the patch, we get two error reports, like this:
qemu-system-FOO: OSPM does not acknowledge previous error, so can not record CPER for current error anymore
qemu-system-FOO: failed to record the error
Aborted (core dumped)
Such error cascades should be avoided.
Afterwards, we get one:
Unexpected error at SOURCE-FILE:LINE-NUMBER:
qemu-system-FOO: OSPM does not acknowledge previous error, so can not record CPER for current error anymore
Aborted (core dumped)
Are all errors reported by acpi_ghes_memory_errors() programming errors,
i.e. when an error is reported, there's a bug for us to fix?
If not, abort() is wrong before the patch, and &error_abort is wrong
afterwards.
You can use &error_fatal for fatal errors that are not programming
errors.
> }
> return;
> }
Hi Markus,
On 11/11/25 3:25 PM, Markus Armbruster wrote:
> Gavin Shan <gshan@redhat.com> writes:
>
>> Use error_abort in acpi_ghes_memory_errors() so that the caller needn't
>> explicitly call abort() on errors. With this change, its return value
>> isn't needed any more.
>>
>> Suggested-by: Igor Mammedov <imammedo@redhat.com>
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> ---
>> hw/acpi/ghes-stub.c | 6 +++---
>> hw/acpi/ghes.c | 15 ++++-----------
>> include/hw/acpi/ghes.h | 5 +++--
>> target/arm/kvm.c | 10 +++-------
>> 4 files changed, 13 insertions(+), 23 deletions(-)
>>
>> diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
>> index 4faf573aeb..4ef914ffc5 100644
>> --- a/hw/acpi/ghes-stub.c
>> +++ b/hw/acpi/ghes-stub.c
>> @@ -11,10 +11,10 @@
>> #include "qemu/osdep.h"
>> #include "hw/acpi/ghes.h"
>>
>> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> - uint64_t *addresses, uint32_t num_of_addresses)
>> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> + uint64_t *addresses, uint32_t num_of_addresses,
>> + Error **errp)
>> {
>> - return -1;
>> }
>
> Before the patch, this function always fails: it returns -1.
>
> Afterwards, it always succeeds: it doesn't set @errp.
>
> Which one is correct?
>
Both are correct. This variant is only used on !CONFIG_ACPI_APEI. In this case,
there is no chance to call this variant in the only caller kvm_arch_on_sigbus_vcpu().
acpi_ghes_get_state() returns NULL on !CONFIG_ACPI_APEI there.
>>
>> AcpiGhesState *acpi_ghes_get_state(void)
>> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
>> index 055e5d719a..aa469c03f2 100644
>> --- a/hw/acpi/ghes.c
>> +++ b/hw/acpi/ghes.c
>> @@ -543,8 +543,9 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
>> notifier_list_notify(&acpi_generic_error_notifiers, &source_id);
>> }
>>
>> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> - uint64_t *addresses, uint32_t num_of_addresses)
>> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> + uint64_t *addresses, uint32_t num_of_addresses,
>> + Error **errp)
>
> qapi/error.h:
>
> * - Whenever practical, also return a value that indicates success /
> * failure. This can make the error checking more concise, and can
> * avoid useless error object creation and destruction. Note that
> * we still have many functions returning void. We recommend
> * • bool-valued functions return true on success / false on failure,
> * • pointer-valued functions return non-null / null pointer, and
> * • integer-valued functions return non-negative / negative.
>
Question: If it's deterministic that caller passes @error_abort or @error_fatal
to acpi_ghes_memory_errors(), QEMU crashes with a core dump or exit before its
caller to check the return value. In this case, it's still preferred for
acpi_ghes_memory_errors() returns a value to indicate the success or failure?
>> {
>> /* Memory Error Section Type */
>> const uint8_t guid[] =
>> @@ -555,7 +556,6 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> * Table 17-13 Generic Error Data Entry
>> */
>> QemuUUID fru_id = {};
>> - Error *errp = NULL;
>> int data_length;
>> GArray *block;
>> uint32_t block_status, i;
>> @@ -592,16 +592,9 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> }
>>
>> /* Report the error */
>> - ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
>> + ghes_record_cper_errors(ags, block->data, block->len, source_id, errp);
>>
>> g_array_free(block, true);
>> -
>> - if (errp) {
>> - error_report_err(errp);
>> - return -1;
>> - }
>> -
>> - return 0;
>> }
>
> The error reporting moves into the caller.
>
Similar question as above. If it's deterministic for the caller passes @error_abort
or @error_fatal to acpi_ghes_memory_errors() and then to ghes_record_cper_errors(),
QEMU crashes with a core dump or exit before error_report_err(errp) can be executed.
In this case, it's still preferred to have error_report_err(&error_abort) or
error_report_err(&error_fatal) in its caller?
>>
>> AcpiGhesState *acpi_ghes_get_state(void)
>> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
>> index f73908985d..35c7bbbb01 100644
>> --- a/include/hw/acpi/ghes.h
>> +++ b/include/hw/acpi/ghes.h
>> @@ -98,8 +98,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
>> const char *oem_id, const char *oem_table_id);
>> void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
>> GArray *hardware_errors);
>> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> - uint64_t *addresses, uint32_t num_of_addresses);
>> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> + uint64_t *addresses, uint32_t num_of_addresses,
>> + Error **errp);
>> void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
>> uint16_t source_id, Error **errp);
>>
>> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
>> index 459ca4a9b0..a889315606 100644
>> --- a/target/arm/kvm.c
>> +++ b/target/arm/kvm.c
>> @@ -2458,13 +2458,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>> addresses[0] = paddr;
>> if (code == BUS_MCEERR_AR) {
>> kvm_cpu_synchronize_state(c);
>> - if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
>> - addresses, 1)) {
>> - kvm_inject_arm_sea(c);
>> - } else {
>> - error_report("failed to record the error");
>> - abort();
>> - }
>> + acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
>> + addresses, 1, &error_abort);
>> + kvm_inject_arm_sea(c);
>
> Before the patch, we get two error reports, like this:
>
> qemu-system-FOO: OSPM does not acknowledge previous error, so can not record CPER for current error anymore
> qemu-system-FOO: failed to record the error
> Aborted (core dumped)
>
> Such error cascades should be avoided.
>
> Afterwards, we get one:
>
> Unexpected error at SOURCE-FILE:LINE-NUMBER:
> qemu-system-FOO: OSPM does not acknowledge previous error, so can not record CPER for current error anymore
> Aborted (core dumped)
>
> Are all errors reported by acpi_ghes_memory_errors() programming errors,
> i.e. when an error is reported, there's a bug for us to fix?
>
> If not, abort() is wrong before the patch, and &error_abort is wrong
> afterwards.
>
> You can use &error_fatal for fatal errors that are not programming
> errors.
>
No, there is no programming errors and the core dump is actually no sense.
It makes more sense for the caller to pass @error_fatal down to acpi_ghes_memory_errors().
>> }
>> return;
>> }
>
Thanks,
Gavin
Gavin Shan <gshan@redhat.com> writes:
> Hi Markus,
>
> On 11/11/25 3:25 PM, Markus Armbruster wrote:
>> Gavin Shan <gshan@redhat.com> writes:
>>
>>> Use error_abort in acpi_ghes_memory_errors() so that the caller needn't
>>> explicitly call abort() on errors. With this change, its return value
>>> isn't needed any more.
>>>
>>> Suggested-by: Igor Mammedov <imammedo@redhat.com>
>>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>>> ---
>>> hw/acpi/ghes-stub.c | 6 +++---
>>> hw/acpi/ghes.c | 15 ++++-----------
>>> include/hw/acpi/ghes.h | 5 +++--
>>> target/arm/kvm.c | 10 +++-------
>>> 4 files changed, 13 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
>>> index 4faf573aeb..4ef914ffc5 100644
>>> --- a/hw/acpi/ghes-stub.c
>>> +++ b/hw/acpi/ghes-stub.c
>>> @@ -11,10 +11,10 @@
>>> #include "qemu/osdep.h"
>>> #include "hw/acpi/ghes.h"
>>>
>>> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>>> - uint64_t *addresses, uint32_t num_of_addresses)
>>> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>>> + uint64_t *addresses, uint32_t num_of_addresses,
>>> + Error **errp)
>>> {
>>> - return -1;
>>> }
>>
>> Before the patch, this function always fails: it returns -1.
>>
>> Afterwards, it always succeeds: it doesn't set @errp.
>>
>> Which one is correct?
>>
>
> Both are correct. This variant is only used on !CONFIG_ACPI_APEI. In this case,
> there is no chance to call this variant in the only caller kvm_arch_on_sigbus_vcpu().
> acpi_ghes_get_state() returns NULL on !CONFIG_ACPI_APEI there.
If this is not supposed to be called, have it abort() to make it
obvious.
>>>
>>> AcpiGhesState *acpi_ghes_get_state(void)
>>> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
>>> index 055e5d719a..aa469c03f2 100644
>>> --- a/hw/acpi/ghes.c
>>> +++ b/hw/acpi/ghes.c
>>> @@ -543,8 +543,9 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
>>> notifier_list_notify(&acpi_generic_error_notifiers, &source_id);
>>> }
>>>
>>> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>>> - uint64_t *addresses, uint32_t num_of_addresses)
>>> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>>> + uint64_t *addresses, uint32_t num_of_addresses,
>>> + Error **errp)
>>
>> qapi/error.h:
>>
>> * - Whenever practical, also return a value that indicates success /
>> * failure. This can make the error checking more concise, and can
>> * avoid useless error object creation and destruction. Note that
>> * we still have many functions returning void. We recommend
>> * • bool-valued functions return true on success / false on failure,
>> * • pointer-valued functions return non-null / null pointer, and
>> * • integer-valued functions return non-negative / negative.
>>
>
> Question: If it's deterministic that caller passes @error_abort or @error_fatal
> to acpi_ghes_memory_errors(), QEMU crashes with a core dump or exit before its
> caller to check the return value. In this case, it's still preferred for
> acpi_ghes_memory_errors() returns a value to indicate the success or failure?
Yes, to separate concerns: acpi_ghes_memory_errors() remains oblivious
of how it is called.
>>> {
>>> /* Memory Error Section Type */
>>> const uint8_t guid[] =
>>> @@ -555,7 +556,6 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>>> * Table 17-13 Generic Error Data Entry
>>> */
>>> QemuUUID fru_id = {};
>>> - Error *errp = NULL;
>>> int data_length;
>>> GArray *block;
>>> uint32_t block_status, i;
>>> @@ -592,16 +592,9 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>>> }
>>>
>>> /* Report the error */
>>> - ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
>>> + ghes_record_cper_errors(ags, block->data, block->len, source_id, errp);
>>>
>>> g_array_free(block, true);
>>> -
>>> - if (errp) {
>>> - error_report_err(errp);
>>> - return -1;
>>> - }
>>> -
>>> - return 0;
>>> }
>>
>> The error reporting moves into the caller.
>>
>
> Similar question as above. If it's deterministic for the caller passes @error_abort
> or @error_fatal to acpi_ghes_memory_errors() and then to ghes_record_cper_errors(),
> QEMU crashes with a core dump or exit before error_report_err(errp) can be executed.
> In this case, it's still preferred to have error_report_err(&error_abort) or
> error_report_err(&error_fatal) in its caller?
Yes.
>>>
>>> AcpiGhesState *acpi_ghes_get_state(void)
>>> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
>>> index f73908985d..35c7bbbb01 100644
>>> --- a/include/hw/acpi/ghes.h
>>> +++ b/include/hw/acpi/ghes.h
>>> @@ -98,8 +98,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
>>> const char *oem_id, const char *oem_table_id);
>>> void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
>>> GArray *hardware_errors);
>>> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>>> - uint64_t *addresses, uint32_t num_of_addresses);
>>> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>>> + uint64_t *addresses, uint32_t num_of_addresses,
>>> + Error **errp);
>>> void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
>>> uint16_t source_id, Error **errp);
>>>
>>> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
>>> index 459ca4a9b0..a889315606 100644
>>> --- a/target/arm/kvm.c
>>> +++ b/target/arm/kvm.c
>>> @@ -2458,13 +2458,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>>> addresses[0] = paddr;
>>> if (code == BUS_MCEERR_AR) {
>>> kvm_cpu_synchronize_state(c);
>>> - if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
>>> - addresses, 1)) {
>>> - kvm_inject_arm_sea(c);
>>> - } else {
>>> - error_report("failed to record the error");
>>> - abort();
>>> - }
>>> + acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
>>> + addresses, 1, &error_abort);
>>> + kvm_inject_arm_sea(c);
>>
>> Before the patch, we get two error reports, like this:
>>
>> qemu-system-FOO: OSPM does not acknowledge previous error, so can not record CPER for current error anymore
>> qemu-system-FOO: failed to record the error
>> Aborted (core dumped)
>>
>> Such error cascades should be avoided.
>>
>> Afterwards, we get one:
>>
>> Unexpected error at SOURCE-FILE:LINE-NUMBER:
>> qemu-system-FOO: OSPM does not acknowledge previous error, so can not record CPER for current error anymore
>> Aborted (core dumped)
>>
>> Are all errors reported by acpi_ghes_memory_errors() programming errors,
>> i.e. when an error is reported, there's a bug for us to fix?
>>
>> If not, abort() is wrong before the patch, and &error_abort is wrong
>> afterwards.
>>
>> You can use &error_fatal for fatal errors that are not programming
>> errors.
>
> No, there is no programming errors and the core dump is actually no sense.
Confirms my suspicion :)
> It makes more sense for the caller to pass @error_fatal down to acpi_ghes_memory_errors().
Makes sense.
>>> }
>>> return;
>>> }
>>
>
> Thanks,
> Gavin
On 5/11/25 12:44, Gavin Shan wrote:
> Use error_abort in acpi_ghes_memory_errors() so that the caller needn't
> explicitly call abort() on errors. With this change, its return value
> isn't needed any more.
>
> Suggested-by: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
> hw/acpi/ghes-stub.c | 6 +++---
> hw/acpi/ghes.c | 15 ++++-----------
> include/hw/acpi/ghes.h | 5 +++--
> target/arm/kvm.c | 10 +++-------
> 4 files changed, 13 insertions(+), 23 deletions(-)
> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
> index f73908985d..35c7bbbb01 100644
> --- a/include/hw/acpi/ghes.h
> +++ b/include/hw/acpi/ghes.h
> @@ -98,8 +98,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
> const char *oem_id, const char *oem_table_id);
> void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
> GArray *hardware_errors);
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> - uint64_t *addresses, uint32_t num_of_addresses);
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> + uint64_t *addresses, uint32_t num_of_addresses,
> + Error **errp);
This is an anti-pattern w.r.t. commit e3fe3988d78 ("error: Document
Error API usage rules"), because it can be called with an errp distinct
of &error_abort.
If you really want to abort(), remove the errp argument, directly call
abort() and rename as acpi_ghes_memory_abort_on_errors().
> void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
> uint16_t source_id, Error **errp);
>
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 459ca4a9b0..a889315606 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -2458,13 +2458,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
> addresses[0] = paddr;
> if (code == BUS_MCEERR_AR) {
> kvm_cpu_synchronize_state(c);
> - if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> - addresses, 1)) {
> - kvm_inject_arm_sea(c);
> - } else {
> - error_report("failed to record the error");
> - abort();
> - }
> + acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> + addresses, 1, &error_abort);
> + kvm_inject_arm_sea(c);
> }
> return;
> }
Philippe Mathieu-Daudé <philmd@linaro.org> writes:
> On 5/11/25 12:44, Gavin Shan wrote:
>> Use error_abort in acpi_ghes_memory_errors() so that the caller needn't
>> explicitly call abort() on errors. With this change, its return value
>> isn't needed any more.
>> Suggested-by: Igor Mammedov <imammedo@redhat.com>
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> ---
>> hw/acpi/ghes-stub.c | 6 +++---
>> hw/acpi/ghes.c | 15 ++++-----------
>> include/hw/acpi/ghes.h | 5 +++--
>> target/arm/kvm.c | 10 +++-------
>> 4 files changed, 13 insertions(+), 23 deletions(-)
>
>
>> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
>> index f73908985d..35c7bbbb01 100644
>> --- a/include/hw/acpi/ghes.h
>> +++ b/include/hw/acpi/ghes.h
>> @@ -98,8 +98,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
>> const char *oem_id, const char *oem_table_id);
>> void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
>> GArray *hardware_errors);
>> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> - uint64_t *addresses, uint32_t num_of_addresses);
>> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> + uint64_t *addresses, uint32_t num_of_addresses,
>> + Error **errp);
>
> This is an anti-pattern w.r.t. commit e3fe3988d78 ("error: Document
> Error API usage rules"), because it can be called with an errp distinct
> of &error_abort.
>
> If you really want to abort(), remove the errp argument, directly call
> abort() and rename as acpi_ghes_memory_abort_on_errors().
Since there are no callers that do not want abort() on error, this is
fine.
I think your patch is also fine in princple, but a few details are
wrong. I'll point them out inline.
[...]
Hi Philippe,
On 11/11/25 12:54 AM, Philippe Mathieu-Daudé wrote:
> On 5/11/25 12:44, Gavin Shan wrote:
>> Use error_abort in acpi_ghes_memory_errors() so that the caller needn't
>> explicitly call abort() on errors. With this change, its return value
>> isn't needed any more.
>>
>> Suggested-by: Igor Mammedov <imammedo@redhat.com>
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> ---
>> hw/acpi/ghes-stub.c | 6 +++---
>> hw/acpi/ghes.c | 15 ++++-----------
>> include/hw/acpi/ghes.h | 5 +++--
>> target/arm/kvm.c | 10 +++-------
>> 4 files changed, 13 insertions(+), 23 deletions(-)
>
>
>> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
>> index f73908985d..35c7bbbb01 100644
>> --- a/include/hw/acpi/ghes.h
>> +++ b/include/hw/acpi/ghes.h
>> @@ -98,8 +98,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
>> const char *oem_id, const char *oem_table_id);
>> void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
>> GArray *hardware_errors);
>> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> - uint64_t *addresses, uint32_t num_of_addresses);
>> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
>> + uint64_t *addresses, uint32_t num_of_addresses,
>> + Error **errp);
>
> This is an anti-pattern w.r.t. commit e3fe3988d78 ("error: Document
> Error API usage rules"), because it can be called with an errp distinct
> of &error_abort.
>
> If you really want to abort(), remove the errp argument, directly call
> abort() and rename as acpi_ghes_memory_abort_on_errors().
>
Thanks for pointing it out. I will improve this like below in next revision.
- Drop 'errp' argument from acpi_ghes_memory_errors(), but I prefer to keep
the function name.
- In acpi_ghes_memory_errors(), a local variable 'Error *err' is added and
pass it to ghes_record_cper_errors(), which is also called by QMP handler
qmp_inject_ghes_v2_error().
Please let me know if there are any more improvements needed.
>> void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
>> uint16_t source_id, Error **errp);
>> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
>> index 459ca4a9b0..a889315606 100644
>> --- a/target/arm/kvm.c
>> +++ b/target/arm/kvm.c
>> @@ -2458,13 +2458,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>> addresses[0] = paddr;
>> if (code == BUS_MCEERR_AR) {
>> kvm_cpu_synchronize_state(c);
>> - if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
>> - addresses, 1)) {
>> - kvm_inject_arm_sea(c);
>> - } else {
>> - error_report("failed to record the error");
>> - abort();
>> - }
>> + acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
>> + addresses, 1, &error_abort);
>> + kvm_inject_arm_sea(c);
>> }
>> return;
>> }
>
Thanks,
Gavin
On Tue, 11 Nov 2025 13:58:46 +1000
Gavin Shan <gshan@redhat.com> wrote:
> Hi Philippe,
>
> On 11/11/25 12:54 AM, Philippe Mathieu-Daudé wrote:
> > On 5/11/25 12:44, Gavin Shan wrote:
> >> Use error_abort in acpi_ghes_memory_errors() so that the caller needn't
> >> explicitly call abort() on errors. With this change, its return value
> >> isn't needed any more.
> >>
> >> Suggested-by: Igor Mammedov <imammedo@redhat.com>
> >> Signed-off-by: Gavin Shan <gshan@redhat.com>
> >> ---
> >> hw/acpi/ghes-stub.c | 6 +++---
> >> hw/acpi/ghes.c | 15 ++++-----------
> >> include/hw/acpi/ghes.h | 5 +++--
> >> target/arm/kvm.c | 10 +++-------
> >> 4 files changed, 13 insertions(+), 23 deletions(-)
> >
> >
> >> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
> >> index f73908985d..35c7bbbb01 100644
> >> --- a/include/hw/acpi/ghes.h
> >> +++ b/include/hw/acpi/ghes.h
> >> @@ -98,8 +98,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
> >> const char *oem_id, const char *oem_table_id);
> >> void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
> >> GArray *hardware_errors);
> >> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> >> - uint64_t *addresses, uint32_t num_of_addresses);
> >> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> >> + uint64_t *addresses, uint32_t num_of_addresses,
> >> + Error **errp);
> >
> > This is an anti-pattern w.r.t. commit e3fe3988d78 ("error: Document
> > Error API usage rules"), because it can be called with an errp distinct
> > of &error_abort.
> >
> > If you really want to abort(), remove the errp argument, directly call
> > abort() and rename as acpi_ghes_memory_abort_on_errors().
> >
>
> Thanks for pointing it out. I will improve this like below in next revision.
>
> - Drop 'errp' argument from acpi_ghes_memory_errors(), but I prefer to keep
> the function name.
>
> - In acpi_ghes_memory_errors(), a local variable 'Error *err' is added and
> pass it to ghes_record_cper_errors(), which is also called by QMP handler
> qmp_inject_ghes_v2_error().
>
> Please let me know if there are any more improvements needed.
>
> >> void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
> >> uint16_t source_id, Error **errp);
> >> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> >> index 459ca4a9b0..a889315606 100644
> >> --- a/target/arm/kvm.c
> >> +++ b/target/arm/kvm.c
> >> @@ -2458,13 +2458,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
> >> addresses[0] = paddr;
> >> if (code == BUS_MCEERR_AR) {
> >> kvm_cpu_synchronize_state(c);
> >> - if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> >> - addresses, 1)) {
> >> - kvm_inject_arm_sea(c);
> >> - } else {
> >> - error_report("failed to record the error");
> >> - abort();
> >> - }
> >> + acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> >> + addresses, 1, &error_abort);
I guess that was my request,
with calling abort() directly you have to print error separately,
while using 'error_abort' is much more cleaner (as this hunk demonstrates)
> >> + kvm_inject_arm_sea(c);
> >> }
> >> return;
> >> }
> >
>
> Thanks,
> Gavin
>
On Wed, 5 Nov 2025 21:44:51 +1000
Gavin Shan <gshan@redhat.com> wrote:
> Use error_abort in acpi_ghes_memory_errors() so that the caller needn't
> explicitly call abort() on errors. With this change, its return value
> isn't needed any more.
>
> Suggested-by: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
> ---
> hw/acpi/ghes-stub.c | 6 +++---
> hw/acpi/ghes.c | 15 ++++-----------
> include/hw/acpi/ghes.h | 5 +++--
> target/arm/kvm.c | 10 +++-------
> 4 files changed, 13 insertions(+), 23 deletions(-)
>
> diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
> index 4faf573aeb..4ef914ffc5 100644
> --- a/hw/acpi/ghes-stub.c
> +++ b/hw/acpi/ghes-stub.c
> @@ -11,10 +11,10 @@
> #include "qemu/osdep.h"
> #include "hw/acpi/ghes.h"
>
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> - uint64_t *addresses, uint32_t num_of_addresses)
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> + uint64_t *addresses, uint32_t num_of_addresses,
> + Error **errp)
> {
> - return -1;
> }
>
> AcpiGhesState *acpi_ghes_get_state(void)
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index 055e5d719a..aa469c03f2 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -543,8 +543,9 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
> notifier_list_notify(&acpi_generic_error_notifiers, &source_id);
> }
>
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> - uint64_t *addresses, uint32_t num_of_addresses)
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> + uint64_t *addresses, uint32_t num_of_addresses,
> + Error **errp)
> {
> /* Memory Error Section Type */
> const uint8_t guid[] =
> @@ -555,7 +556,6 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> * Table 17-13 Generic Error Data Entry
> */
> QemuUUID fru_id = {};
> - Error *errp = NULL;
> int data_length;
> GArray *block;
> uint32_t block_status, i;
> @@ -592,16 +592,9 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> }
>
> /* Report the error */
> - ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
> + ghes_record_cper_errors(ags, block->data, block->len, source_id, errp);
>
> g_array_free(block, true);
> -
> - if (errp) {
> - error_report_err(errp);
> - return -1;
> - }
> -
> - return 0;
> }
>
> AcpiGhesState *acpi_ghes_get_state(void)
> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
> index f73908985d..35c7bbbb01 100644
> --- a/include/hw/acpi/ghes.h
> +++ b/include/hw/acpi/ghes.h
> @@ -98,8 +98,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
> const char *oem_id, const char *oem_table_id);
> void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
> GArray *hardware_errors);
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> - uint64_t *addresses, uint32_t num_of_addresses);
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> + uint64_t *addresses, uint32_t num_of_addresses,
> + Error **errp);
> void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
> uint16_t source_id, Error **errp);
>
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 459ca4a9b0..a889315606 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -2458,13 +2458,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
> addresses[0] = paddr;
> if (code == BUS_MCEERR_AR) {
> kvm_cpu_synchronize_state(c);
> - if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> - addresses, 1)) {
> - kvm_inject_arm_sea(c);
> - } else {
> - error_report("failed to record the error");
> - abort();
> - }
> + acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> + addresses, 1, &error_abort);
> + kvm_inject_arm_sea(c);
> }
> return;
> }
On Wed, 5 Nov 2025 21:44:51 +1000 Gavin Shan <gshan@redhat.com> wrote: > Use error_abort in acpi_ghes_memory_errors() so that the caller needn't > explicitly call abort() on errors. With this change, its return value > isn't needed any more. > > Suggested-by: Igor Mammedov <imammedo@redhat.com> > Signed-off-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
© 2016 - 2025 Red Hat, Inc.