If the BIOS generates a very small ARM Processor Error, or
an incomplete one, the current logic will fail to deferrence
err->section_length
and
ctx_info->size
Add checks to avoid that. With such changes, such GHESv2
records won't cause OOPSes like this:
[ 1.492129] Internal error: Oops: 0000000096000005 [#1] SMP
[ 1.495449] Modules linked in:
[ 1.495820] CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.18.0-rc1-00017-gabadcc3553dd-dirty #18 PREEMPT
[ 1.496125] Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 02/02/2022
[ 1.496433] Workqueue: kacpi_notify acpi_os_execute_deferred
[ 1.496967] pstate: 814000c5 (Nzcv daIF +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
[ 1.497199] pc : log_arm_hw_error+0x5c/0x200
[ 1.497380] lr : ghes_handle_arm_hw_error+0x94/0x220
0xffff8000811c5324 is in log_arm_hw_error (../drivers/ras/ras.c:75).
70 err_info = (struct cper_arm_err_info *)(err + 1);
71 ctx_info = (struct cper_arm_ctx_info *)(err_info + err->err_info_num);
72 ctx_err = (u8 *)ctx_info;
73
74 for (n = 0; n < err->context_info_num; n++) {
75 sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size;
76 ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz);
77 ctx_len += sz;
78 }
79
and similar ones while trying to access section_length on an
error dump with too small size.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
drivers/acpi/apei/ghes.c | 33 +++++++++++++++++++++++++++++----
drivers/ras/ras.c | 6 +++++-
2 files changed, 34 insertions(+), 5 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0dc767392a6c..9bf4ec84f160 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -552,21 +552,46 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
{
struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
int flags = sync ? MF_ACTION_REQUIRED : 0;
+ int length = gdata->error_data_length;
char error_type[120];
bool queued = false;
int sec_sev, i;
char *p;
sec_sev = ghes_severity(gdata->error_severity);
- log_arm_hw_error(err, sec_sev);
+ if (length >= sizeof(*err)) {
+ log_arm_hw_error(err, sec_sev);
+ } else {
+ pr_warn(FW_BUG "arm error length: %d\n", length);
+ pr_warn(FW_BUG "length is too small\n");
+ pr_warn(FW_BUG "firmware-generated error record is incorrect\n");
+ return false;
+ }
+
if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
return false;
p = (char *)(err + 1);
+ length -= sizeof(err);
+
for (i = 0; i < err->err_info_num; i++) {
- struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
- bool is_cache = err_info->type & CPER_ARM_CACHE_ERROR;
- bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
+ struct cper_arm_err_info *err_info;
+ bool is_cache, has_pa;
+
+ /* Ensure we have enough data for the error info header */
+ length -= sizeof(*err_info);
+ if (length < 0)
+ break;
+
+ err_info = (struct cper_arm_err_info *)p;
+
+ /* Validate the claimed length before using it */
+ length -= err_info->length;
+ if (length < 0)
+ break;
+
+ is_cache = err_info->type & CPER_ARM_CACHE_ERROR;
+ has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
/*
* The field (err_info->error_info & BIT(26)) is fixed to set to
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 2a5b5a9fdcb3..03df3db62334 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -72,7 +72,11 @@ void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev)
ctx_err = (u8 *)ctx_info;
for (n = 0; n < err->context_info_num; n++) {
- sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size;
+ sz = sizeof(struct cper_arm_ctx_info);
+
+ if (sz + (long)ctx_info - (long)err >= err->section_length)
+ sz += ctx_info->size;
+
ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz);
ctx_len += sz;
}
--
2.52.0
On Fri, 19 Dec 2025 11:49:59 +0100
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> If the BIOS generates a very small ARM Processor Error, or
> an incomplete one, the current logic will fail to deferrence
>
> err->section_length
> and
> ctx_info->size
>
> Add checks to avoid that. With such changes, such GHESv2
> records won't cause OOPSes like this:
>
> [ 1.492129] Internal error: Oops: 0000000096000005 [#1] SMP
> [ 1.495449] Modules linked in:
> [ 1.495820] CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.18.0-rc1-00017-gabadcc3553dd-dirty #18 PREEMPT
> [ 1.496125] Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 02/02/2022
> [ 1.496433] Workqueue: kacpi_notify acpi_os_execute_deferred
> [ 1.496967] pstate: 814000c5 (Nzcv daIF +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
> [ 1.497199] pc : log_arm_hw_error+0x5c/0x200
> [ 1.497380] lr : ghes_handle_arm_hw_error+0x94/0x220
>
> 0xffff8000811c5324 is in log_arm_hw_error (../drivers/ras/ras.c:75).
> 70 err_info = (struct cper_arm_err_info *)(err + 1);
> 71 ctx_info = (struct cper_arm_ctx_info *)(err_info + err->err_info_num);
> 72 ctx_err = (u8 *)ctx_info;
> 73
> 74 for (n = 0; n < err->context_info_num; n++) {
> 75 sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size;
> 76 ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz);
> 77 ctx_len += sz;
> 78 }
> 79
>
> and similar ones while trying to access section_length on an
> error dump with too small size.
>
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Hi Mauro,
This is fiddly stuff to read in the spec but I think you have a double
counting of the "ARM Processors Error Information Structure" size as
the length in that this time is the length of the structure itself,
not a following body.
Jonathan
> ---
> drivers/acpi/apei/ghes.c | 33 +++++++++++++++++++++++++++++----
> drivers/ras/ras.c | 6 +++++-
> 2 files changed, 34 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 0dc767392a6c..9bf4ec84f160 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -552,21 +552,46 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
> {
> struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
> int flags = sync ? MF_ACTION_REQUIRED : 0;
> + int length = gdata->error_data_length;
> char error_type[120];
> bool queued = false;
> int sec_sev, i;
> char *p;
>
> sec_sev = ghes_severity(gdata->error_severity);
> - log_arm_hw_error(err, sec_sev);
> + if (length >= sizeof(*err)) {
> + log_arm_hw_error(err, sec_sev);
> + } else {
> + pr_warn(FW_BUG "arm error length: %d\n", length);
> + pr_warn(FW_BUG "length is too small\n");
> + pr_warn(FW_BUG "firmware-generated error record is incorrect\n");
> + return false;
> + }
> +
> if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
> return false;
>
> p = (char *)(err + 1);
> + length -= sizeof(err);
Hacks off the bit of the section that is fixed size.
> +
> for (i = 0; i < err->err_info_num; i++) {
> - struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
> - bool is_cache = err_info->type & CPER_ARM_CACHE_ERROR;
> - bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
> + struct cper_arm_err_info *err_info;
> + bool is_cache, has_pa;
> +
> + /* Ensure we have enough data for the error info header */
> + length -= sizeof(*err_info);
hacks of length of one processor error information structure (fixed 32 bytes)
> + if (length < 0)
> + break;
> +
> + err_info = (struct cper_arm_err_info *)p;
> +
> + /* Validate the claimed length before using it */
> + length -= err_info->length;
This one confuses me. err_info->length is the same 32 bytes you removed above.
So I think this check is wrong.
> + if (length < 0)
> + break;
> +
> + is_cache = err_info->type & CPER_ARM_CACHE_ERROR;
> + has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
>
> /*
> * The field (err_info->error_info & BIT(26)) is fixed to set to
> diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
> index 2a5b5a9fdcb3..03df3db62334 100644
> --- a/drivers/ras/ras.c
> +++ b/drivers/ras/ras.c
> @@ -72,7 +72,11 @@ void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev)
> ctx_err = (u8 *)ctx_info;
>
> for (n = 0; n < err->context_info_num; n++) {
> - sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size;
> + sz = sizeof(struct cper_arm_ctx_info);
> +
> + if (sz + (long)ctx_info - (long)err >= err->section_length)
> + sz += ctx_info->size;
> +
> ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz);
> ctx_len += sz;
> }
On Mon, Dec 22, 2025 at 11:38:51AM +0000, Jonathan Cameron wrote:
> On Fri, 19 Dec 2025 11:49:59 +0100
> Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
>
> > If the BIOS generates a very small ARM Processor Error, or
> > an incomplete one, the current logic will fail to deferrence
> >
> > err->section_length
> > and
> > ctx_info->size
> >
> > Add checks to avoid that. With such changes, such GHESv2
> > records won't cause OOPSes like this:
> >
> > [ 1.492129] Internal error: Oops: 0000000096000005 [#1] SMP
> > [ 1.495449] Modules linked in:
> > [ 1.495820] CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.18.0-rc1-00017-gabadcc3553dd-dirty #18 PREEMPT
> > [ 1.496125] Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 02/02/2022
> > [ 1.496433] Workqueue: kacpi_notify acpi_os_execute_deferred
> > [ 1.496967] pstate: 814000c5 (Nzcv daIF +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
> > [ 1.497199] pc : log_arm_hw_error+0x5c/0x200
> > [ 1.497380] lr : ghes_handle_arm_hw_error+0x94/0x220
> >
> > 0xffff8000811c5324 is in log_arm_hw_error (../drivers/ras/ras.c:75).
> > 70 err_info = (struct cper_arm_err_info *)(err + 1);
> > 71 ctx_info = (struct cper_arm_ctx_info *)(err_info + err->err_info_num);
> > 72 ctx_err = (u8 *)ctx_info;
> > 73
> > 74 for (n = 0; n < err->context_info_num; n++) {
> > 75 sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size;
> > 76 ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz);
> > 77 ctx_len += sz;
> > 78 }
> > 79
> >
> > and similar ones while trying to access section_length on an
> > error dump with too small size.
> >
> > Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> Hi Mauro,
>
> This is fiddly stuff to read in the spec but I think you have a double
> counting of the "ARM Processors Error Information Structure" size as
> the length in that this time is the length of the structure itself,
> not a following body.
True. The change below should fix it:
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index d37540ef8c00..aacb8d66a3e1 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -582,8 +582,7 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
bool is_cache, has_pa;
/* Ensure we have enough data for the error info header */
- length -= sizeof(*err_info);
- if (length < 0)
+ if (length < sizeof(*err_info))
break;
err_info = (struct cper_arm_err_info *)p;
I'll run some tests here after the change before submitting v4.
Thanks!
Mauro
>
> Jonathan
>
>
> > ---
> > drivers/acpi/apei/ghes.c | 33 +++++++++++++++++++++++++++++----
> > drivers/ras/ras.c | 6 +++++-
> > 2 files changed, 34 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> > index 0dc767392a6c..9bf4ec84f160 100644
> > --- a/drivers/acpi/apei/ghes.c
> > +++ b/drivers/acpi/apei/ghes.c
> > @@ -552,21 +552,46 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
> > {
> > struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
> > int flags = sync ? MF_ACTION_REQUIRED : 0;
> > + int length = gdata->error_data_length;
> > char error_type[120];
> > bool queued = false;
> > int sec_sev, i;
> > char *p;
> >
> > sec_sev = ghes_severity(gdata->error_severity);
> > - log_arm_hw_error(err, sec_sev);
> > + if (length >= sizeof(*err)) {
> > + log_arm_hw_error(err, sec_sev);
> > + } else {
> > + pr_warn(FW_BUG "arm error length: %d\n", length);
> > + pr_warn(FW_BUG "length is too small\n");
> > + pr_warn(FW_BUG "firmware-generated error record is incorrect\n");
> > + return false;
> > + }
> > +
> > if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
> > return false;
> >
> > p = (char *)(err + 1);
> > + length -= sizeof(err);
> Hacks off the bit of the section that is fixed size.
> > +
> > for (i = 0; i < err->err_info_num; i++) {
> > - struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
> > - bool is_cache = err_info->type & CPER_ARM_CACHE_ERROR;
> > - bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
> > + struct cper_arm_err_info *err_info;
> > + bool is_cache, has_pa;
> > +
> > + /* Ensure we have enough data for the error info header */
> > + length -= sizeof(*err_info);
> hacks of length of one processor error information structure (fixed 32 bytes)
>
> > + if (length < 0)
> > + break;
> > +
> > + err_info = (struct cper_arm_err_info *)p;
> > +
> > + /* Validate the claimed length before using it */
> > + length -= err_info->length;
>
> This one confuses me. err_info->length is the same 32 bytes you removed above.
>
> So I think this check is wrong.
>
>
> > + if (length < 0)
> > + break;
> > +
> > + is_cache = err_info->type & CPER_ARM_CACHE_ERROR;
> > + has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
> >
> > /*
> > * The field (err_info->error_info & BIT(26)) is fixed to set to
> > diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
> > index 2a5b5a9fdcb3..03df3db62334 100644
> > --- a/drivers/ras/ras.c
> > +++ b/drivers/ras/ras.c
> > @@ -72,7 +72,11 @@ void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev)
> > ctx_err = (u8 *)ctx_info;
> >
> > for (n = 0; n < err->context_info_num; n++) {
> > - sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size;
> > + sz = sizeof(struct cper_arm_ctx_info);
> > +
> > + if (sz + (long)ctx_info - (long)err >= err->section_length)
> > + sz += ctx_info->size;
> > +
> > ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz);
> > ctx_len += sz;
> > }
>
--
Thanks,
Mauro
© 2016 - 2026 Red Hat, Inc.