Add a logic to do PCIe BUS error injection.
On Linux Kernel, despite CPER_SEC_PCI_X_BUS macro is defined for such
event, ghes.c doesn't implement support for it yet:
[16950.077494] {26}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
[16950.077866] {26}[Hardware Error]: event severity: recoverable
[16950.078118] {26}[Hardware Error]: Error 0, type: recoverable
[16950.078444] {26}[Hardware Error]: section type: unknown, c5753963-3b84-4095-bf78-eddad3f9c9dd
[16950.078800] {26}[Hardware Error]: section length: 0x48
[16950.079069] {26}[Hardware Error]: 00000000: 00000000 00000000 00000000 00000000 ................
[16950.079442] {26}[Hardware Error]: 00000010: 00000001 00000000 00000000 00000000 ................
[16950.079811] {26}[Hardware Error]: 00000020: 00000000 00000000 00000000 00000000 ................
[16950.080181] {26}[Hardware Error]: 00000030: 00000000 00000000 00000000 00000000 ................
[16950.080538] {26}[Hardware Error]: 00000040: 00000000 00000000 ........
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
MAINTAINERS | 1 +
scripts/ghes_inject.py | 2 +
scripts/pcie_bus_error.py | 146 ++++++++++++++++++++++++++++++++++++++
3 files changed, 149 insertions(+)
create mode 100644 scripts/pcie_bus_error.py
diff --git a/MAINTAINERS b/MAINTAINERS
index a970c47dd089..48067a618523 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2228,6 +2228,7 @@ F: qapi/acpi-hest.json
F: scripts/ghes_decode.py
F: scripts/ghes_inject.py
F: scripts/arm_processor_error.py
+F: scripts/pcie_bus_error.py
F: scripts/qmp_helper.py
ppc4xx
diff --git a/scripts/ghes_inject.py b/scripts/ghes_inject.py
index 6ac917d0b5db..29a6a57508cd 100755
--- a/scripts/ghes_inject.py
+++ b/scripts/ghes_inject.py
@@ -12,6 +12,7 @@
import sys
from arm_processor_error import ArmProcessorEinj
+from pcie_bus_error import PcieBusError
EINJ_DESC = """
Handle ACPI GHESv2 error injection logic QEMU QMP interface.
@@ -40,6 +41,7 @@ def main():
subparsers = parser.add_subparsers()
ArmProcessorEinj(subparsers)
+ PcieBusError(subparsers)
args = parser.parse_args()
if "func" in args:
diff --git a/scripts/pcie_bus_error.py b/scripts/pcie_bus_error.py
new file mode 100644
index 000000000000..e8285b5dcc84
--- /dev/null
+++ b/scripts/pcie_bus_error.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+#
+# pylint: disable=C0114,R0903
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2024 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+
+from qmp_helper import qmp, util, cper_guid
+
+class PcieBusError:
+ """
+ Implements PCI Express bus error injection via GHES
+ """
+
+ def __init__(self, subparsers):
+ """Initialize the error injection class and add subparser"""
+
+ # Valid values
+ self.valid_bits = {
+ "status": util.bit(0),
+ "type": util.bit(1),
+ "bus-id": util.bit(2),
+ "bus-addr": util.bit(3),
+ "bus-data": util.bit(4),
+ "command": util.bit(5),
+ "requestor-id": util.bit(6),
+ "completer-id": util.bit(7),
+ "target-id": util.bit(8),
+ }
+
+ self.bus_command_bits = {
+ "pci": 0, # Bit 56 is zero
+ "pci-x": util.bit(56)
+ }
+
+ self.data = bytearray()
+
+ parser = subparsers.add_parser("pcie-bus",
+ description="Generate PCIe bus error CPER")
+ g_pcie = parser.add_argument_group("PCIe bus error")
+
+ valid_bits = ",".join(self.valid_bits.keys())
+ bus_command_bits = ",".join(self.bus_command_bits.keys())
+
+ g_pcie.add_argument("-v", "--valid",
+ help=f"Valid bits: {valid_bits}")
+ g_pcie.add_argument("-s", "--error-status",
+ type=lambda x: int(x, 0),
+ help="Error Status")
+ g_pcie.add_argument("-t", "--error-type",
+ type=lambda x: int(x, 0),
+ help="Error type")
+ g_pcie.add_argument("-b", "--bus-number",
+ type=lambda x: int(x, 0),
+ help="Bus number")
+ g_pcie.add_argument("-S", "--segment-number",
+ type=lambda x: int(x, 0),
+ help="Segment number")
+ g_pcie.add_argument("-a", "--bus-address",
+ type=lambda x: int(x, 0),
+ help="Bus address")
+ g_pcie.add_argument("-d", "--bus-data",
+ type=lambda x: int(x, 0),
+ help="Bus data")
+ g_pcie.add_argument("-c", "--bus-command",
+ help=f"bus-command: {bus_command_bits}")
+ g_pcie.add_argument("-r", "--bus-requestor",
+ type=lambda x: int(x, 0),
+ help="Bus requestor ID")
+ g_pcie.add_argument("-C", "--bus-completer",
+ type=lambda x: int(x, 0),
+ help="Bus completer ID")
+ g_pcie.add_argument("-i", "--target-id",
+ type=lambda x: int(x, 0),
+ help="Target ID")
+
+ parser.set_defaults(func=self.send_cper)
+
+ def send_cper(self, args):
+ """Parse subcommand arguments and send a CPER via QMP"""
+
+ qmp_cmd = qmp(args.host, args.port, args.debug)
+
+ cper = {}
+ arg = vars(args)
+
+ # Handle global parameters
+ if args.valid:
+ valid_init = False
+ cper["valid"] = util.get_choice(name="valid",
+ value=args.valid,
+ choices=self.valid_bits)
+ else:
+ cper["valid"] = 0
+ valid_init = True
+
+ if args.bus_command:
+ cper["bus-command"] = util.get_choice(name="bus-command",
+ value=args.bus_command,
+ choices=self.bus_command_bits)
+ if valid_init:
+ if args.error_status:
+ cper["valid"] |= self.valid_bits["status"]
+
+ if args.error_type:
+ cper["valid"] |= self.valid_bits["type"]
+
+ if args.bus_number and args.bus_segment:
+ cper["valid"] |= self.valid_bits["bus-id"]
+
+ if args.bus_address:
+ cper["valid"] |= self.valid_bits["bus-address"]
+
+ if args.bus_data:
+ cper["valid"] |= self.valid_bits["bus-data"]
+
+ if args.bus_requestor:
+ cper["valid"] |= self.valid_bits["requestor-id"]
+
+ if args.bus_completer:
+ cper["valid"] |= self.valid_bits["completer-id"]
+
+ if args.target_id:
+ cper["valid"] |= self.valid_bits["target-id"]
+
+ util.data_add(self.data, cper["valid"], 8)
+ util.data_add(self.data, arg.get("error-status", 0), 8)
+ util.data_add(self.data, arg.get("error-type", util.bit(0)), 2)
+
+ # Bus ID
+ util.data_add(self.data, arg.get("bus-number", 0), 1)
+ util.data_add(self.data, arg.get("segment-number", 0), 1)
+
+ # Reserved
+ util.data_add(self.data, 0, 4)
+
+ util.data_add(self.data, arg.get("bus-address", 0), 8)
+ util.data_add(self.data, arg.get("bus-data", 0), 8)
+
+ util.data_add(self.data, cper.get("bus-command", 0), 8)
+
+ util.data_add(self.data, arg.get("bus-requestor", 0), 8)
+ util.data_add(self.data, arg.get("bus-completer", 0), 8)
+ util.data_add(self.data, arg.get("target-id", 0), 8)
+
+ return qmp_cmd.send_cper(cper_guid.CPER_PCI_BUS, self.data)
--
2.52.0
On Wed, 21 Jan 2026 12:25:17 +0100
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> Add a logic to do PCIe BUS error injection.
>
> On Linux Kernel, despite CPER_SEC_PCI_X_BUS macro is defined for such
> event, ghes.c doesn't implement support for it yet:
>
> [16950.077494] {26}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
> [16950.077866] {26}[Hardware Error]: event severity: recoverable
> [16950.078118] {26}[Hardware Error]: Error 0, type: recoverable
> [16950.078444] {26}[Hardware Error]: section type: unknown, c5753963-3b84-4095-bf78-eddad3f9c9dd
> [16950.078800] {26}[Hardware Error]: section length: 0x48
> [16950.079069] {26}[Hardware Error]: 00000000: 00000000 00000000 00000000 00000000 ................
> [16950.079442] {26}[Hardware Error]: 00000010: 00000001 00000000 00000000 00000000 ................
> [16950.079811] {26}[Hardware Error]: 00000020: 00000000 00000000 00000000 00000000 ................
> [16950.080181] {26}[Hardware Error]: 00000030: 00000000 00000000 00000000 00000000 ................
> [16950.080538] {26}[Hardware Error]: 00000040: 00000000 00000000 ........
>
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
LGTM. Bit surprised Linux doesn't decode it but fair enough.
Seems a bit unlikely it ever will given this seems not to cover PCIe
which has it's own records.
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
On Wed, 21 Jan 2026 13:32:55 +0000
Jonathan Cameron <jonathan.cameron@huawei.com> wrote:
> On Wed, 21 Jan 2026 12:25:17 +0100
> Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
>
> > Add a logic to do PCIe BUS error injection.
> >
> > On Linux Kernel, despite CPER_SEC_PCI_X_BUS macro is defined for such
> > event, ghes.c doesn't implement support for it yet:
> >
> > [16950.077494] {26}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
> > [16950.077866] {26}[Hardware Error]: event severity: recoverable
> > [16950.078118] {26}[Hardware Error]: Error 0, type: recoverable
> > [16950.078444] {26}[Hardware Error]: section type: unknown, c5753963-3b84-4095-bf78-eddad3f9c9dd
> > [16950.078800] {26}[Hardware Error]: section length: 0x48
> > [16950.079069] {26}[Hardware Error]: 00000000: 00000000 00000000 00000000 00000000 ................
> > [16950.079442] {26}[Hardware Error]: 00000010: 00000001 00000000 00000000 00000000 ................
> > [16950.079811] {26}[Hardware Error]: 00000020: 00000000 00000000 00000000 00000000 ................
> > [16950.080181] {26}[Hardware Error]: 00000030: 00000000 00000000 00000000 00000000 ................
> > [16950.080538] {26}[Hardware Error]: 00000040: 00000000 00000000 ........
> >
> > Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
>
> LGTM. Bit surprised Linux doesn't decode it but fair enough.
> Seems a bit unlikely it ever will given this seems not to cover PCIe
> which has it's own records.
>
Just noticed your patch description. This is PCI/PCI-X errors, not PCIe.
> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
On Wed, 21 Jan 2026 13:33:55 +0000
Jonathan Cameron via qemu development <qemu-devel@nongnu.org> wrote:
> On Wed, 21 Jan 2026 13:32:55 +0000
> Jonathan Cameron <jonathan.cameron@huawei.com> wrote:
>
> > On Wed, 21 Jan 2026 12:25:17 +0100
> > Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> >
> > > Add a logic to do PCIe BUS error injection.
> > >
> > > On Linux Kernel, despite CPER_SEC_PCI_X_BUS macro is defined for such
> > > event, ghes.c doesn't implement support for it yet:
> > >
> > > [16950.077494] {26}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
> > > [16950.077866] {26}[Hardware Error]: event severity: recoverable
> > > [16950.078118] {26}[Hardware Error]: Error 0, type: recoverable
> > > [16950.078444] {26}[Hardware Error]: section type: unknown, c5753963-3b84-4095-bf78-eddad3f9c9dd
> > > [16950.078800] {26}[Hardware Error]: section length: 0x48
> > > [16950.079069] {26}[Hardware Error]: 00000000: 00000000 00000000 00000000 00000000 ................
> > > [16950.079442] {26}[Hardware Error]: 00000010: 00000001 00000000 00000000 00000000 ................
> > > [16950.079811] {26}[Hardware Error]: 00000020: 00000000 00000000 00000000 00000000 ................
> > > [16950.080181] {26}[Hardware Error]: 00000030: 00000000 00000000 00000000 00000000 ................
> > > [16950.080538] {26}[Hardware Error]: 00000040: 00000000 00000000 ........
> > >
> > > Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> >
> > LGTM. Bit surprised Linux doesn't decode it but fair enough.
> > Seems a bit unlikely it ever will given this seems not to cover PCIe
> > which has it's own records.
> >
> Just noticed your patch description. This is PCI/PCI-X errors, not PCIe.
Seem this was stuck in my outbox. Please ignore as you long fixed this
I think.
J
>
> > Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
>
>
On Wed, 21 Jan 2026 13:32:55 +0000
Jonathan Cameron via qemu development <qemu-devel@nongnu.org> wrote:
> On Wed, 21 Jan 2026 12:25:17 +0100
> Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
>
> > Add a logic to do PCIe BUS error injection.
> >
> > On Linux Kernel, despite CPER_SEC_PCI_X_BUS macro is defined for such
> > event, ghes.c doesn't implement support for it yet:
> >
> > [16950.077494] {26}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
> > [16950.077866] {26}[Hardware Error]: event severity: recoverable
> > [16950.078118] {26}[Hardware Error]: Error 0, type: recoverable
> > [16950.078444] {26}[Hardware Error]: section type: unknown, c5753963-3b84-4095-bf78-eddad3f9c9dd
> > [16950.078800] {26}[Hardware Error]: section length: 0x48
> > [16950.079069] {26}[Hardware Error]: 00000000: 00000000 00000000 00000000 00000000 ................
> > [16950.079442] {26}[Hardware Error]: 00000010: 00000001 00000000 00000000 00000000 ................
> > [16950.079811] {26}[Hardware Error]: 00000020: 00000000 00000000 00000000 00000000 ................
> > [16950.080181] {26}[Hardware Error]: 00000030: 00000000 00000000 00000000 00000000 ................
> > [16950.080538] {26}[Hardware Error]: 00000040: 00000000 00000000 ........
> >
> > Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
>
> LGTM. Bit surprised Linux doesn't decode it but fair enough.
> Seems a bit unlikely it ever will given this seems not to cover PCIe
> which has it's own records.
Yeah, I misread the spec when I wrote: this one is specific for
PCI/PCI-X, and not PCIe. That probably explain why this was not
implemented in practice yet.
I'll rename it.
Still, it is good to test it, even not being implemented, as it
helps to check how Linux reacts to a GUID it doesn't know about
it.
>
> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
>
On Wed, Jan 21, 2026 at 01:32:55PM +0000, Jonathan Cameron wrote:
> On Wed, 21 Jan 2026 12:25:17 +0100
> Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
>
> > Add a logic to do PCIe BUS error injection.
> >
> > On Linux Kernel, despite CPER_SEC_PCI_X_BUS macro is defined for such
> > event, ghes.c doesn't implement support for it yet:
> >
> > [16950.077494] {26}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
> > [16950.077866] {26}[Hardware Error]: event severity: recoverable
> > [16950.078118] {26}[Hardware Error]: Error 0, type: recoverable
> > [16950.078444] {26}[Hardware Error]: section type: unknown, c5753963-3b84-4095-bf78-eddad3f9c9dd
> > [16950.078800] {26}[Hardware Error]: section length: 0x48
> > [16950.079069] {26}[Hardware Error]: 00000000: 00000000 00000000 00000000 00000000 ................
> > [16950.079442] {26}[Hardware Error]: 00000010: 00000001 00000000 00000000 00000000 ................
> > [16950.079811] {26}[Hardware Error]: 00000020: 00000000 00000000 00000000 00000000 ................
> > [16950.080181] {26}[Hardware Error]: 00000030: 00000000 00000000 00000000 00000000 ................
> > [16950.080538] {26}[Hardware Error]: 00000040: 00000000 00000000 ........
> >
> > Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
>
> LGTM. Bit surprised Linux doesn't decode it but fair enough.
> Seems a bit unlikely it ever will given this seems not to cover PCIe
> which has it's own records.
Yeah, me too. If I got it right from specs, this one is related to
the PCIe bus controller, while the other one is for the PCIe device.
Perhaps in practice vendors are using hardware-first approach for
the PCI controller.
>
> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
--
Thanks,
Mauro
© 2016 - 2026 Red Hat, Inc.