Up to UEFI spec, the type byte of CPER struct for ARM processor was
defined simply as:
Type at byte offset 4:
- Cache error
- TLB Error
- Bus Error
- Micro-architectural Error
All other values are reserved
Yet, there was no information about how this would be encoded.
Spec 2.9A errata corrected it by defining:
- Bit 1 - Cache Error
- Bit 2 - TLB Error
- Bit 3 - Bus Error
- Bit 4 - Micro-architectural Error
All other values are reserved
That actually aligns with the values already defined on older
versions at N.2.4.1. Generic Processor Error Section.
Spec 2.10 also preserve the same encoding as 2.9A
Adjust CPER and GHES handling code for both generic and ARM
processors to properly handle UEFI 2.9A and 2.10 encoding.
Link: https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#arm-processor-error-information
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
drivers/acpi/apei/ghes.c | 15 ++++++----
drivers/firmware/efi/cper-arm.c | 50 ++++++++++++++++-----------------
include/linux/cper.h | 10 +++----
3 files changed, 38 insertions(+), 37 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 623cc0cb4a65..de79cc0a0f1d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -533,6 +533,7 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
{
struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
int flags = sync ? MF_ACTION_REQUIRED : 0;
+ char error_type[120];
bool queued = false;
int sec_sev, i;
char *p;
@@ -546,9 +547,8 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
p = (char *)(err + 1);
for (i = 0; i < err->err_info_num; i++) {
struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
- bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
+ bool is_cache = err_info->type & CPER_ARM_CACHE_ERROR;
bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
- const char *error_type = "unknown error";
/*
* The field (err_info->error_info & BIT(26)) is fixed to set to
@@ -562,12 +562,15 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
continue;
}
- if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
- error_type = cper_proc_error_type_strs[err_info->type];
+ cper_bits_to_str(error_type, sizeof(error_type),
+ FIELD_GET(CPER_ARM_ERR_TYPE_MASK, err_info->type),
+ cper_proc_error_type_strs,
+ ARRAY_SIZE(cper_proc_error_type_strs));
pr_warn_ratelimited(FW_WARN GHES_PFX
- "Unhandled processor error type: %s\n",
- error_type);
+ "Unhandled processor error type 0x%02x: %s%s\n",
+ err_info->type, error_type,
+ (err_info->type & ~CPER_ARM_ERR_TYPE_MASK) ? " with reserved bit(s)" : "");
p += err_info->length;
}
diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c
index eb7ee6af55f2..52d18490b59e 100644
--- a/drivers/firmware/efi/cper-arm.c
+++ b/drivers/firmware/efi/cper-arm.c
@@ -93,15 +93,11 @@ static void cper_print_arm_err_info(const char *pfx, u32 type,
bool proc_context_corrupt, corrected, precise_pc, restartable_pc;
bool time_out, access_mode;
- /* If the type is unknown, bail. */
- if (type > CPER_ARM_MAX_TYPE)
- return;
-
/*
* Vendor type errors have error information values that are vendor
* specific.
*/
- if (type == CPER_ARM_VENDOR_ERROR)
+ if (type & CPER_ARM_VENDOR_ERROR)
return;
if (error_info & CPER_ARM_ERR_VALID_TRANSACTION_TYPE) {
@@ -116,43 +112,38 @@ static void cper_print_arm_err_info(const char *pfx, u32 type,
if (error_info & CPER_ARM_ERR_VALID_OPERATION_TYPE) {
op_type = ((error_info >> CPER_ARM_ERR_OPERATION_SHIFT)
& CPER_ARM_ERR_OPERATION_MASK);
- switch (type) {
- case CPER_ARM_CACHE_ERROR:
+ if (type & CPER_ARM_CACHE_ERROR) {
if (op_type < ARRAY_SIZE(arm_cache_err_op_strs)) {
- printk("%soperation type: %s\n", pfx,
+ printk("%scache error, operation type: %s\n", pfx,
arm_cache_err_op_strs[op_type]);
}
- break;
- case CPER_ARM_TLB_ERROR:
+ }
+ if (type & CPER_ARM_TLB_ERROR) {
if (op_type < ARRAY_SIZE(arm_tlb_err_op_strs)) {
- printk("%soperation type: %s\n", pfx,
+ printk("%sTLB error, operation type: %s\n", pfx,
arm_tlb_err_op_strs[op_type]);
}
- break;
- case CPER_ARM_BUS_ERROR:
+ }
+ if (type & CPER_ARM_BUS_ERROR) {
if (op_type < ARRAY_SIZE(arm_bus_err_op_strs)) {
- printk("%soperation type: %s\n", pfx,
+ printk("%sbus error, operation type: %s\n", pfx,
arm_bus_err_op_strs[op_type]);
}
- break;
}
}
if (error_info & CPER_ARM_ERR_VALID_LEVEL) {
level = ((error_info >> CPER_ARM_ERR_LEVEL_SHIFT)
& CPER_ARM_ERR_LEVEL_MASK);
- switch (type) {
- case CPER_ARM_CACHE_ERROR:
+ if (type & CPER_ARM_CACHE_ERROR)
printk("%scache level: %d\n", pfx, level);
- break;
- case CPER_ARM_TLB_ERROR:
+
+ if (type & CPER_ARM_TLB_ERROR)
printk("%sTLB level: %d\n", pfx, level);
- break;
- case CPER_ARM_BUS_ERROR:
+
+ if (type & CPER_ARM_BUS_ERROR)
printk("%saffinity level at which the bus error occurred: %d\n",
pfx, level);
- break;
- }
}
if (error_info & CPER_ARM_ERR_VALID_PROC_CONTEXT_CORRUPT) {
@@ -241,6 +232,7 @@ void cper_print_proc_arm(const char *pfx,
struct cper_arm_err_info *err_info;
struct cper_arm_ctx_info *ctx_info;
char newpfx[64], infopfx[ARRAY_SIZE(newpfx) + 1];
+ char error_type[120];
printk("%sMIDR: 0x%016llx\n", pfx, proc->midr);
@@ -289,9 +281,15 @@ void cper_print_proc_arm(const char *pfx,
newpfx);
}
- printk("%serror_type: %d, %s\n", newpfx, err_info->type,
- err_info->type < ARRAY_SIZE(cper_proc_error_type_strs) ?
- cper_proc_error_type_strs[err_info->type] : "unknown");
+ cper_bits_to_str(error_type, sizeof(error_type),
+ FIELD_GET(CPER_ARM_ERR_TYPE_MASK, err_info->type),
+ cper_proc_error_type_strs,
+ ARRAY_SIZE(cper_proc_error_type_strs));
+
+ printk("%serror_type: 0x%02x: %s%s\n", newpfx, err_info->type,
+ error_type,
+ (err_info->type & ~CPER_ARM_ERR_TYPE_MASK) ? " with reserved bit(s)" : "");
+
if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO) {
printk("%serror_info: 0x%016llx\n", newpfx,
err_info->error_info);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index c2f14b916bfb..fc62a80575e8 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -293,11 +293,11 @@ enum {
#define CPER_ARM_INFO_FLAGS_PROPAGATED BIT(2)
#define CPER_ARM_INFO_FLAGS_OVERFLOW BIT(3)
-#define CPER_ARM_CACHE_ERROR 0
-#define CPER_ARM_TLB_ERROR 1
-#define CPER_ARM_BUS_ERROR 2
-#define CPER_ARM_VENDOR_ERROR 3
-#define CPER_ARM_MAX_TYPE CPER_ARM_VENDOR_ERROR
+#define CPER_ARM_ERR_TYPE_MASK GENMASK(4,1)
+#define CPER_ARM_CACHE_ERROR BIT(1)
+#define CPER_ARM_TLB_ERROR BIT(2)
+#define CPER_ARM_BUS_ERROR BIT(3)
+#define CPER_ARM_VENDOR_ERROR BIT(4)
#define CPER_ARM_ERR_VALID_TRANSACTION_TYPE BIT(0)
#define CPER_ARM_ERR_VALID_OPERATION_TYPE BIT(1)
--
2.45.2
Hi Mauro,
kernel test robot noticed the following build errors:
[auto build test ERROR on efi/next]
[also build test ERROR on rafael-pm/linux-next rafael-pm/bleeding-edge linus/master v6.10-rc5 next-20240627]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Mauro-Carvalho-Chehab/efi-cper-Adjust-infopfx-size-to-accept-an-extra-space/20240625-203952
base: https://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git next
patch link: https://lore.kernel.org/r/b9354882f45a0c600e65df4bacee2f1080c4ba89.1719219886.git.mchehab%2Bhuawei%40kernel.org
patch subject: [PATCH v5 3/4] efi/cper: align ARM CPER type with UEFI 2.9A/2.10 specs
config: x86_64-buildonly-randconfig-003-20240628 (https://download.01.org/0day-ci/archive/20240629/202406290457.3HKFUkuV-lkp@intel.com/config)
compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240629/202406290457.3HKFUkuV-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406290457.3HKFUkuV-lkp@intel.com/
All errors (new ones prefixed by >>):
drivers/acpi/apei/ghes.c: In function 'ghes_handle_arm_hw_error':
>> drivers/acpi/apei/ghes.c:566:34: error: implicit declaration of function 'FIELD_GET' [-Werror=implicit-function-declaration]
566 | FIELD_GET(CPER_ARM_ERR_TYPE_MASK, err_info->type),
| ^~~~~~~~~
cc1: some warnings being treated as errors
vim +/FIELD_GET +566 drivers/acpi/apei/ghes.c
530
531 static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
532 int sev, bool sync)
533 {
534 struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
535 int flags = sync ? MF_ACTION_REQUIRED : 0;
536 char error_type[120];
537 bool queued = false;
538 int sec_sev, i;
539 char *p;
540
541 log_arm_hw_error(err);
542
543 sec_sev = ghes_severity(gdata->error_severity);
544 if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
545 return false;
546
547 p = (char *)(err + 1);
548 for (i = 0; i < err->err_info_num; i++) {
549 struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
550 bool is_cache = err_info->type & CPER_ARM_CACHE_ERROR;
551 bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
552
553 /*
554 * The field (err_info->error_info & BIT(26)) is fixed to set to
555 * 1 in some old firmware of HiSilicon Kunpeng920. We assume that
556 * firmware won't mix corrected errors in an uncorrected section,
557 * and don't filter out 'corrected' error here.
558 */
559 if (is_cache && has_pa) {
560 queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags);
561 p += err_info->length;
562 continue;
563 }
564
565 cper_bits_to_str(error_type, sizeof(error_type),
> 566 FIELD_GET(CPER_ARM_ERR_TYPE_MASK, err_info->type),
567 cper_proc_error_type_strs,
568 ARRAY_SIZE(cper_proc_error_type_strs));
569
570 pr_warn_ratelimited(FW_WARN GHES_PFX
571 "Unhandled processor error type 0x%02x: %s%s\n",
572 err_info->type, error_type,
573 (err_info->type & ~CPER_ARM_ERR_TYPE_MASK) ? " with reserved bit(s)" : "");
574 p += err_info->length;
575 }
576
577 return queued;
578 }
579
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Mauro,
kernel test robot noticed the following build errors:
[auto build test ERROR on efi/next]
[also build test ERROR on rafael-pm/linux-next rafael-pm/bleeding-edge linus/master v6.10-rc5 next-20240626]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Mauro-Carvalho-Chehab/efi-cper-Adjust-infopfx-size-to-accept-an-extra-space/20240625-203952
base: https://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git next
patch link: https://lore.kernel.org/r/b9354882f45a0c600e65df4bacee2f1080c4ba89.1719219886.git.mchehab%2Bhuawei%40kernel.org
patch subject: [PATCH v5 3/4] efi/cper: align ARM CPER type with UEFI 2.9A/2.10 specs
config: i386-randconfig-004-20240627 (https://download.01.org/0day-ci/archive/20240627/202406271626.72sHSPJJ-lkp@intel.com/config)
compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240627/202406271626.72sHSPJJ-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406271626.72sHSPJJ-lkp@intel.com/
All errors (new ones prefixed by >>):
>> drivers/acpi/apei/ghes.c:566:6: error: call to undeclared function 'FIELD_GET'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
566 | FIELD_GET(CPER_ARM_ERR_TYPE_MASK, err_info->type),
| ^
1 error generated.
vim +/FIELD_GET +566 drivers/acpi/apei/ghes.c
530
531 static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
532 int sev, bool sync)
533 {
534 struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
535 int flags = sync ? MF_ACTION_REQUIRED : 0;
536 char error_type[120];
537 bool queued = false;
538 int sec_sev, i;
539 char *p;
540
541 log_arm_hw_error(err);
542
543 sec_sev = ghes_severity(gdata->error_severity);
544 if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
545 return false;
546
547 p = (char *)(err + 1);
548 for (i = 0; i < err->err_info_num; i++) {
549 struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
550 bool is_cache = err_info->type & CPER_ARM_CACHE_ERROR;
551 bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
552
553 /*
554 * The field (err_info->error_info & BIT(26)) is fixed to set to
555 * 1 in some old firmware of HiSilicon Kunpeng920. We assume that
556 * firmware won't mix corrected errors in an uncorrected section,
557 * and don't filter out 'corrected' error here.
558 */
559 if (is_cache && has_pa) {
560 queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags);
561 p += err_info->length;
562 continue;
563 }
564
565 cper_bits_to_str(error_type, sizeof(error_type),
> 566 FIELD_GET(CPER_ARM_ERR_TYPE_MASK, err_info->type),
567 cper_proc_error_type_strs,
568 ARRAY_SIZE(cper_proc_error_type_strs));
569
570 pr_warn_ratelimited(FW_WARN GHES_PFX
571 "Unhandled processor error type 0x%02x: %s%s\n",
572 err_info->type, error_type,
573 (err_info->type & ~CPER_ARM_ERR_TYPE_MASK) ? " with reserved bit(s)" : "");
574 p += err_info->length;
575 }
576
577 return queued;
578 }
579
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.