drivers/acpi/apei/ghes.c | 19 +++++++++++--- drivers/firmware/efi/cper-arm.c | 44 ++++++++++++++------------------- include/linux/cper.h | 9 +++---- 3 files changed, 37 insertions(+), 35 deletions(-)
Up to UEFI spec, the type byte of CPER struct was defined simply
as:
Type at byte offset 4:
- Cache error
- TLB Error
- Bus Error
- Micro-architectural Error
All other values are reserved
Yet, there was no information about how this would be encoded.
Spec 2.9A corrected it by defining:
- Bit 1 - Cache Error
- Bit 2 - TLB Error
- Bit 3 - Bus Error
- Bit 4 - Micro-architectural Error
All other values are reserved
Spec 2.10 also preserve the same encoding as 2.9A
See: https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#arm-processor-error-information
Adjust CPER handling code for ARM to properly handle UEFI 2.9A and
2.10 encoding.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
drivers/acpi/apei/ghes.c | 19 +++++++++++---
drivers/firmware/efi/cper-arm.c | 44 ++++++++++++++-------------------
include/linux/cper.h | 9 +++----
3 files changed, 37 insertions(+), 35 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ed32bbecb4a3..365de4115508 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -546,9 +546,12 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
p = (char *)(err + 1);
for (i = 0; i < err->err_info_num; i++) {
struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
- bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
+ bool is_cache = (err_info->type & CPER_ARM_CACHE_ERROR);
bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
- const char *error_type = "unknown error";
+ char error_type[120] = "";
+ char *s = error_type;
+ int len = 0;
+ int i;
/*
* The field (err_info->error_info & BIT(26)) is fixed to set to
@@ -562,8 +565,16 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
continue;
}
- if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
- error_type = cper_proc_error_type_strs[err_info->type];
+ for (i = 0; i < ARRAY_SIZE(cper_proc_error_type_strs); i++) {
+ if (!(err_info->type & (1U << i)))
+ continue;
+
+ len += snprintf(s, sizeof(err_info->type) - len, "%s ", cper_proc_error_type_strs[i]);
+ s += len;
+ }
+
+ if (!*error_type)
+ strscpy(error_type, "unknown error", sizeof(error_type));
pr_warn_ratelimited(FW_WARN GHES_PFX
"Unhandled processor error type: %s\n",
diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c
index fa9c1c3bf168..f57641eb548a 100644
--- a/drivers/firmware/efi/cper-arm.c
+++ b/drivers/firmware/efi/cper-arm.c
@@ -93,15 +93,11 @@ static void cper_print_arm_err_info(const char *pfx, u32 type,
bool proc_context_corrupt, corrected, precise_pc, restartable_pc;
bool time_out, access_mode;
- /* If the type is unknown, bail. */
- if (type > CPER_ARM_MAX_TYPE)
- return;
-
/*
* Vendor type errors have error information values that are vendor
* specific.
*/
- if (type == CPER_ARM_VENDOR_ERROR)
+ if (type & CPER_ARM_VENDOR_ERROR)
return;
if (error_info & CPER_ARM_ERR_VALID_TRANSACTION_TYPE) {
@@ -116,43 +112,38 @@ static void cper_print_arm_err_info(const char *pfx, u32 type,
if (error_info & CPER_ARM_ERR_VALID_OPERATION_TYPE) {
op_type = ((error_info >> CPER_ARM_ERR_OPERATION_SHIFT)
& CPER_ARM_ERR_OPERATION_MASK);
- switch (type) {
- case CPER_ARM_CACHE_ERROR:
+ if (type & CPER_ARM_CACHE_ERROR) {
if (op_type < ARRAY_SIZE(arm_cache_err_op_strs)) {
- printk("%soperation type: %s\n", pfx,
+ printk("%scache error: %s\n", pfx,
arm_cache_err_op_strs[op_type]);
}
- break;
- case CPER_ARM_TLB_ERROR:
+ }
+ if (type & CPER_ARM_TLB_ERROR) {
if (op_type < ARRAY_SIZE(arm_tlb_err_op_strs)) {
- printk("%soperation type: %s\n", pfx,
+ printk("%sTLB error: %s\n", pfx,
arm_tlb_err_op_strs[op_type]);
}
- break;
- case CPER_ARM_BUS_ERROR:
+ }
+ if (type & CPER_ARM_BUS_ERROR) {
if (op_type < ARRAY_SIZE(arm_bus_err_op_strs)) {
- printk("%soperation type: %s\n", pfx,
+ printk("%sbus error: %s\n", pfx,
arm_bus_err_op_strs[op_type]);
}
- break;
}
}
if (error_info & CPER_ARM_ERR_VALID_LEVEL) {
level = ((error_info >> CPER_ARM_ERR_LEVEL_SHIFT)
& CPER_ARM_ERR_LEVEL_MASK);
- switch (type) {
- case CPER_ARM_CACHE_ERROR:
+ if (type & CPER_ARM_CACHE_ERROR)
printk("%scache level: %d\n", pfx, level);
- break;
- case CPER_ARM_TLB_ERROR:
+
+ if (type & CPER_ARM_TLB_ERROR)
printk("%sTLB level: %d\n", pfx, level);
- break;
- case CPER_ARM_BUS_ERROR:
+
+ if (type & CPER_ARM_BUS_ERROR)
printk("%saffinity level at which the bus error occurred: %d\n",
pfx, level);
- break;
- }
}
if (error_info & CPER_ARM_ERR_VALID_PROC_CONTEXT_CORRUPT) {
@@ -289,9 +280,10 @@ void cper_print_proc_arm(const char *pfx,
newpfx);
}
- printk("%serror_type: %d, %s\n", newpfx, err_info->type,
- err_info->type < ARRAY_SIZE(cper_proc_error_type_strs) ?
- cper_proc_error_type_strs[err_info->type] : "unknown");
+ printk("%s""error_type: 0x%02x\n", pfx, err_info->type);
+ cper_print_bits(pfx, err_info->type,
+ cper_proc_error_type_strs,
+ ARRAY_SIZE(cper_proc_error_type_strs));
if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO) {
printk("%serror_info: 0x%016llx\n", newpfx,
err_info->error_info);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 265b0f8fc0b3..afc6d41b4e67 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -293,11 +293,10 @@ enum {
#define CPER_ARM_INFO_FLAGS_PROPAGATED BIT(2)
#define CPER_ARM_INFO_FLAGS_OVERFLOW BIT(3)
-#define CPER_ARM_CACHE_ERROR 0
-#define CPER_ARM_TLB_ERROR 1
-#define CPER_ARM_BUS_ERROR 2
-#define CPER_ARM_VENDOR_ERROR 3
-#define CPER_ARM_MAX_TYPE CPER_ARM_VENDOR_ERROR
+#define CPER_ARM_CACHE_ERROR BIT(1)
+#define CPER_ARM_TLB_ERROR BIT(2)
+#define CPER_ARM_BUS_ERROR BIT(3)
+#define CPER_ARM_VENDOR_ERROR BIT(4)
#define CPER_ARM_ERR_VALID_TRANSACTION_TYPE BIT(0)
#define CPER_ARM_ERR_VALID_OPERATION_TYPE BIT(1)
--
2.45.2
On Wed, 19 Jun 2024 12:52:38 +0200
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> Up to UEFI spec, the type byte of CPER struct was defined simply
> as:
>
> Type at byte offset 4:
>
> - Cache error
> - TLB Error
> - Bus Error
> - Micro-architectural Error
> All other values are reserved
>
> Yet, there was no information about how this would be encoded.
>
> Spec 2.9A corrected it by defining:
>
> - Bit 1 - Cache Error
> - Bit 2 - TLB Error
> - Bit 3 - Bus Error
> - Bit 4 - Micro-architectural Error
> All other values are reserved
>
> Spec 2.10 also preserve the same encoding as 2.9A
>
> See: https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#arm-processor-error-information
>
> Adjust CPER handling code for ARM to properly handle UEFI 2.9A and
> 2.10 encoding.
Hi Mauro,
I'd be tempted to use "ARM Processor" throughout this patch description
as could in theory be something else and currently the link
is the only way to tell!
A few comments inline.
Good catch on the spec change btw.
>
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> ---
> drivers/acpi/apei/ghes.c | 19 +++++++++++---
> drivers/firmware/efi/cper-arm.c | 44 ++++++++++++++-------------------
> include/linux/cper.h | 9 +++----
> 3 files changed, 37 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index ed32bbecb4a3..365de4115508 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -546,9 +546,12 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
> p = (char *)(err + 1);
> for (i = 0; i < err->err_info_num; i++) {
> struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
> - bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
> + bool is_cache = (err_info->type & CPER_ARM_CACHE_ERROR);
Matches local style I guess but the () are unnecessary.
> bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
> - const char *error_type = "unknown error";
> + char error_type[120] = "";
> + char *s = error_type;
> + int len = 0;
> + int i;
Shadowing i which is bad for readability.
>
> /*
> * The field (err_info->error_info & BIT(26)) is fixed to set to
> @@ -562,8 +565,16 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
> continue;
> }
>
> - if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
> - error_type = cper_proc_error_type_strs[err_info->type];
> + for (i = 0; i < ARRAY_SIZE(cper_proc_error_type_strs); i++) {
> + if (!(err_info->type & (1U << i)))
> + continue;
> +
> + len += snprintf(s, sizeof(err_info->type) - len, "%s ", cper_proc_error_type_strs[i]);
Size of the index into the type string array? I'm confused.
sizeof(error_type) maybe?
Also, maybe break that long line of code before cper_*
> + s += len;
> + }
> +
> + if (!*error_type)
> + strscpy(error_type, "unknown error", sizeof(error_type));
Perhaps should handle multiple bits where only one is unknown?
So maybe compare with a mask of known bits and print this on the end (perhaps
including which bit)?
>
> pr_warn_ratelimited(FW_WARN GHES_PFX
> "Unhandled processor error type: %s\n",
> diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c
> index fa9c1c3bf168..f57641eb548a 100644
> --- a/drivers/firmware/efi/cper-arm.c
> +++ b/drivers/firmware/efi/cper-arm.c
> @@ -93,15 +93,11 @@ static void cper_print_arm_err_info(const char *pfx, u32 type,
> bool proc_context_corrupt, corrected, precise_pc, restartable_pc;
> bool time_out, access_mode;
>
> - /* If the type is unknown, bail. */
> - if (type > CPER_ARM_MAX_TYPE)
> - return;
> -
> /*
> * Vendor type errors have error information values that are vendor
> * specific.
> */
> - if (type == CPER_ARM_VENDOR_ERROR)
> + if (type & CPER_ARM_VENDOR_ERROR)
> return;
>
> if (error_info & CPER_ARM_ERR_VALID_TRANSACTION_TYPE) {
> @@ -116,43 +112,38 @@ static void cper_print_arm_err_info(const char *pfx, u32 type,
> if (error_info & CPER_ARM_ERR_VALID_OPERATION_TYPE) {
> op_type = ((error_info >> CPER_ARM_ERR_OPERATION_SHIFT)
> & CPER_ARM_ERR_OPERATION_MASK);
> - switch (type) {
> - case CPER_ARM_CACHE_ERROR:
> + if (type & CPER_ARM_CACHE_ERROR) {
> if (op_type < ARRAY_SIZE(arm_cache_err_op_strs)) {
> - printk("%soperation type: %s\n", pfx,
> + printk("%scache error: %s\n", pfx,
> arm_cache_err_op_strs[op_type]);
Can we keep that this is an operation type in print?
"%scache error, operation type: %s\n" perhaps?
> }
> - break;
> - case CPER_ARM_TLB_ERROR:
> + }
> + if (type & CPER_ARM_TLB_ERROR) {
> if (op_type < ARRAY_SIZE(arm_tlb_err_op_strs)) {
> - printk("%soperation type: %s\n", pfx,
> + printk("%sTLB error: %s\n", pfx,
> arm_tlb_err_op_strs[op_type]);
> }
> - break;
> - case CPER_ARM_BUS_ERROR:
> + }
> + if (type & CPER_ARM_BUS_ERROR) {
> if (op_type < ARRAY_SIZE(arm_bus_err_op_strs)) {
> - printk("%soperation type: %s\n", pfx,
> + printk("%sbus error: %s\n", pfx,
> arm_bus_err_op_strs[op_type]);
> }
> - break;
> }
> }
>
> if (error_info & CPER_ARM_ERR_VALID_LEVEL) {
> level = ((error_info >> CPER_ARM_ERR_LEVEL_SHIFT)
> & CPER_ARM_ERR_LEVEL_MASK);
Not a today thing, but would be lovely to use FIELD_GET()
for all these with appropriately fixed up mask definitions.
Right now it is inconsistent as the valid entries are handled
as shifted values, and we have GENMASK(X,0) plus a shift for these.
> - switch (type) {
> - case CPER_ARM_CACHE_ERROR:
> + if (type & CPER_ARM_CACHE_ERROR)
> printk("%scache level: %d\n", pfx, level);
> - break;
> - case CPER_ARM_TLB_ERROR:
> +
> + if (type & CPER_ARM_TLB_ERROR)
> printk("%sTLB level: %d\n", pfx, level);
> - break;
> - case CPER_ARM_BUS_ERROR:
> +
> + if (type & CPER_ARM_BUS_ERROR)
> printk("%saffinity level at which the bus error occurred: %d\n",
> pfx, level);
> - break;
> - }
> }
© 2016 - 2025 Red Hat, Inc.