drivers/cxl/acpi.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-)
On some platforms (e.g., RISC-V and ARM64) that use the generic
pci_acpi_scan_root() implementation, cxl_acpi_probe may run before
acpi_pci_root driver has bound to ACPI0016 (CXL host bridge) devices.
In this case, acpi_pci_find_root() returns NULL, causing
to_cxl_host_bridge() to skip the device silently. This results in
incomplete CXL port enumeration on first boot.
Fix this by detecting the case where an ACPI0016 device exists but its
PCI root bridge is not yet ready, and returning -EPROBE_DEFER to trigger
a deferred probe retry.
Signed-off-by: Chen Pei <cp0613@linux.alibaba.com>
---
drivers/cxl/acpi.c | 26 ++++++++++++++++++++++++--
1 file changed, 24 insertions(+), 2 deletions(-)
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 127537628817..9952d0cff903 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -631,8 +631,21 @@ static int add_host_bridge_dport(struct device *match, void *arg)
struct acpi_pci_root *pci_root;
struct cxl_port *root_port = arg;
struct device *host = root_port->dev.parent;
- struct acpi_device *hb = to_cxl_host_bridge(host, match);
+ struct acpi_device *adev = to_acpi_device(match);
+ struct acpi_device *hb;
+ /*
+ * If this is an ACPI0016 device but acpi_pci_find_root() hasn't
+ * found the PCI root yet (driver not probed), defer the probe
+ * to allow acpi_pci_root to bind first.
+ */
+ if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
+ !acpi_pci_find_root(adev->handle)) {
+ dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
+ return -EPROBE_DEFER;
+ }
+
+ hb = to_cxl_host_bridge(host, match);
if (!hb)
return 0;
@@ -688,7 +701,8 @@ static int add_host_bridge_uport(struct device *match, void *arg)
{
struct cxl_port *root_port = arg;
struct device *host = root_port->dev.parent;
- struct acpi_device *hb = to_cxl_host_bridge(host, match);
+ struct acpi_device *adev = to_acpi_device(match);
+ struct acpi_device *hb;
struct acpi_pci_root *pci_root;
struct cxl_dport *dport;
struct cxl_port *port;
@@ -697,6 +711,14 @@ static int add_host_bridge_uport(struct device *match, void *arg)
resource_size_t component_reg_phys;
int rc;
+ /* Same deferral check as in add_host_bridge_dport() */
+ if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
+ !acpi_pci_find_root(adev->handle)) {
+ dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
+ return -EPROBE_DEFER;
+ }
+
+ hb = to_cxl_host_bridge(host, match);
if (!hb)
return 0;
--
2.50.1
On 5/13/26 7:32 PM, Chen Pei wrote:
> On some platforms (e.g., RISC-V and ARM64) that use the generic
> pci_acpi_scan_root() implementation, cxl_acpi_probe may run before
> acpi_pci_root driver has bound to ACPI0016 (CXL host bridge) devices.
> In this case, acpi_pci_find_root() returns NULL, causing
> to_cxl_host_bridge() to skip the device silently. This results in
> incomplete CXL port enumeration on first boot.
>
> Fix this by detecting the case where an ACPI0016 device exists but its
> PCI root bridge is not yet ready, and returning -EPROBE_DEFER to trigger
> a deferred probe retry.
>
> Signed-off-by: Chen Pei <cp0613@linux.alibaba.com>
> ---
> drivers/cxl/acpi.c | 26 ++++++++++++++++++++++++--
> 1 file changed, 24 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> index 127537628817..9952d0cff903 100644
> --- a/drivers/cxl/acpi.c
> +++ b/drivers/cxl/acpi.c
> @@ -631,8 +631,21 @@ static int add_host_bridge_dport(struct device *match, void *arg)
> struct acpi_pci_root *pci_root;
> struct cxl_port *root_port = arg;
> struct device *host = root_port->dev.parent;
> - struct acpi_device *hb = to_cxl_host_bridge(host, match);
> + struct acpi_device *adev = to_acpi_device(match);
> + struct acpi_device *hb;
>
> + /*
> + * If this is an ACPI0016 device but acpi_pci_find_root() hasn't
> + * found the PCI root yet (driver not probed), defer the probe
> + * to allow acpi_pci_root to bind first.
> + */
> + if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
> + !acpi_pci_find_root(adev->handle)) {
I would leave the assignment of 'hb' as is above and just what's below after looking at to_cxl_host_bridge() function.
if (!hb) {
if (!acpi_pci_find_root(adev->handle)) {
...
return -EPROBE_DEFER;
}
return 0;
}
I am concerned of other failure cases that causes acpi_pci_find_root() to fail beyond the driver not being attached. Will cxl_acpi probe() be stuck permanently in the defer probe queue when that happens? This is also something that Sashiko raised. Is there no way to adjust the RISCV ACPI setup to go around subsys_initcall()? That would be preferred solution.
DJ
> + dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
> + return -EPROBE_DEFER;
> + }
> +
> + hb = to_cxl_host_bridge(host, match);
> if (!hb)
> return 0;
>
> @@ -688,7 +701,8 @@ static int add_host_bridge_uport(struct device *match, void *arg)
> {
> struct cxl_port *root_port = arg;
> struct device *host = root_port->dev.parent;
> - struct acpi_device *hb = to_cxl_host_bridge(host, match);
> + struct acpi_device *adev = to_acpi_device(match);
> + struct acpi_device *hb;
> struct acpi_pci_root *pci_root;
> struct cxl_dport *dport;
> struct cxl_port *port;
> @@ -697,6 +711,14 @@ static int add_host_bridge_uport(struct device *match, void *arg)
> resource_size_t component_reg_phys;
> int rc;
>
> + /* Same deferral check as in add_host_bridge_dport() */
> + if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
> + !acpi_pci_find_root(adev->handle)) {
> + dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
> + return -EPROBE_DEFER;
> + }
> +
> + hb = to_cxl_host_bridge(host, match);
> if (!hb)
> return 0;
>
On Fri, 15 May 2026 08:38:01 -0700, Dave Jiang wrote:
> I would leave the assignment of 'hb' as is above and just what's below
> after looking at to_cxl_host_bridge() function.
>
> if (!hb) {
> if (!acpi_pci_find_root(adev->handle)) {
> ...
> return -EPROBE_DEFER;
> }
> return 0;
> }
Hi Dave,
Thanks for the review and the code structure suggestion. I agree that
nesting the check under `if (!hb)` is cleaner.
> I am concerned of other failure cases that causes acpi_pci_find_root()
> to fail beyond the driver not being attached. Will cxl_acpi probe() be
> stuck permanently in the defer probe queue when that happens? This is
> also something that Sashiko raised. Is there no way to adjust the
> RISCV ACPI setup to go around subsys_initcall()? That would be
> preferred solution.
You raise a valid concern. The EPROBE_DEFER approach indeed risks
infinite deferral if acpi_pci_find_root() fails for reasons other
than the driver not being attached yet. This is a fundamental
limitation of this approach.
After considering the feedback from you, Sashiko, and Dan Williams,
I plan to abandon the EPROBE_DEFER approach and instead use the
ACPI _DEP mechanism as Dan suggested. This is the proper ACPI
standard way to declare device dependencies:
1. Firmware side (DSDT): Add a _DEP package to the ACPI0017 device
pointing to all ACPI0016 host bridges it depends on.
2. Kernel side: Add acpi_dev_clear_dependencies() in acpi_pci_root_add()
so that ACPI0017 will naturally wait for all ACPI0016 devices to
be ready before probing.
This eliminates the permanent defer risk since the dependency is
explicitly declared by firmware and resolved by the ACPI core,
rather than relying on runtime probing heuristics.
I'll send a new patch series for this approach.
Thanks,
Pei
On Thu, May 14, 2026 at 10:32:38AM +0800, Chen Pei wrote:
> On some platforms (e.g., RISC-V and ARM64) that use the generic
> pci_acpi_scan_root() implementation, cxl_acpi_probe may run before
> acpi_pci_root driver has bound to ACPI0016 (CXL host bridge) devices.
> In this case, acpi_pci_find_root() returns NULL, causing
> to_cxl_host_bridge() to skip the device silently. This results in
> incomplete CXL port enumeration on first boot.
>
> Fix this by detecting the case where an ACPI0016 device exists but its
> PCI root bridge is not yet ready, and returning -EPROBE_DEFER to trigger
> a deferred probe retry.
>
> Signed-off-by: Chen Pei <cp0613@linux.alibaba.com>
Hi Chen Pei,
As Richard suggested, this fails for the mock platform in cxl-test.
(stack trace appended at end)
With this diff applied on top of your patch, it works for cxl-test
AND I think it works for your case too. With real hardware,
ACPI_COMPANION returns the device, and with the mock platform,
ACPI_COMPANION returns NULL and the defer check is skipped.
Try it out, and note that I didn't consider if any of the comments
need updating.
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 9952d0cff903..ec037668afba 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -631,7 +631,7 @@ static int add_host_bridge_dport(struct device *match, void *arg)
struct acpi_pci_root *pci_root;
struct cxl_port *root_port = arg;
struct device *host = root_port->dev.parent;
- struct acpi_device *adev = to_acpi_device(match);
+ struct acpi_device *adev = ACPI_COMPANION(match);
struct acpi_device *hb;
/*
@@ -639,7 +639,7 @@ static int add_host_bridge_dport(struct device *match, void *arg)
* found the PCI root yet (driver not probed), defer the probe
* to allow acpi_pci_root to bind first.
*/
- if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
+ if (adev && strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
!acpi_pci_find_root(adev->handle)) {
dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
return -EPROBE_DEFER;
@@ -701,7 +701,7 @@ static int add_host_bridge_uport(struct device *match, void *arg)
{
struct cxl_port *root_port = arg;
struct device *host = root_port->dev.parent;
- struct acpi_device *adev = to_acpi_device(match);
+ struct acpi_device *adev = ACPI_COMPANION(match);
struct acpi_device *hb;
struct acpi_pci_root *pci_root;
struct cxl_dport *dport;
@@ -711,8 +711,7 @@ static int add_host_bridge_uport(struct device *match, void *arg)
resource_size_t component_reg_phys;
int rc;
- /* Same deferral check as in add_host_bridge_dport() */
- if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
+ if (adev && strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
!acpi_pci_find_root(adev->handle)) {
dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
return -EPROBE_DEFER;
==========
Failure loading cxl-test module:
[ 6.523556] calling cxl_test_init+0x0/0xff0 [cxl_test] @ 622
[ 6.524952] BUG: kernel NULL pointer dereference, address: 0000000000000091
[ 6.526022] #PF: supervisor read access in kernel mode
[ 6.526988] #PF: error_code(0x0000) - not-present page
[ 6.527855] PGD 0 P4D 0
[ 6.528331] Oops: Oops: 0000 [#1] SMP NOPTI
[ 6.529268] CPU: 3 UID: 0 PID: 622 Comm: systemd-modules Tainted: G O 7.1.0-rc1+ #212 PREEMPT(lazy)
[ 6.530655] Tainted: [O]=OOT_MODULE
[ 6.531238] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
[ 6.532321] RIP: 0010:acpi_device_hid+0x18/0x30
[ 6.533008] Code: cc cc 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 48 8b 87 98 00 00 00 48 81 c7 98 00 00 00 48 39 f8 74 0e 48 85 c0 74 09 <48> 8b 40 10 c3 cc cc cc cc 48 c7 c0 ad c9 a7 82 c3 cc cc cc cc 0f
[ 6.535011] RSP: 0018:ffffc90002077870 EFLAGS: 00010206
[ 6.535729] RAX: 0000000000000081 RBX: ffff8882000eb010 RCX: ffffffffa00504f0
[ 6.536514] RDX: ffff88800199acc8 RSI: ffff888006ca6000 RDI: ffff8882000eae48
[ 6.537284] RBP: ffffc900020778c0 R08: ffffffffa0e67491 R09: 0000000000000040
[ 6.538122] R10: ffff888203667c00 R11: ffffffff835ccf70 R12: ffff888006ba3010
[ 6.538961] R13: ffff888006ca6000 R14: 0000000000000000 R15: ffff888006ca6000
[ 6.539777] FS: 00007f9001205480(0000) GS:ffff8880fa501000(0000) knlGS:0000000000000000
[ 6.540639] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 6.541313] CR2: 0000000000000091 CR3: 0000000007037003 CR4: 0000000000370ef0
[ 6.542125] Call Trace:
[ 6.542540] <TASK>
[ 6.542903] ? add_host_bridge_dport+0x23/0x200 [cxl_acpi]
[ 6.543538] ? klist_next+0xb0/0x170
[ 6.544019] ? __pfx_add_host_bridge_dport+0x10/0x10 [cxl_acpi]
[ 6.544716] bus_for_each_dev+0x65/0xa0
[ 6.545204] cxl_acpi_probe+0xe5/0x2d0 [cxl_acpi]
[ 6.545758] ? acpi_dev_pm_attach+0x20/0xf0
[ 6.546300] platform_probe+0x3a/0x70
[ 6.546834] really_probe+0xda/0x3e0
[ 6.547302] ? __pfx___device_attach_driver+0x10/0x10
[ 6.547913] __driver_probe_device+0x10b/0x1a0
[ 6.548422] driver_probe_device+0x1f/0x90
[ 6.548949] __device_attach_driver+0x8f/0x130
[ 6.549448] bus_for_each_drv+0x73/0xb0
[ 6.549947] __device_attach+0xb1/0x1c0
[ 6.550371] device_initial_probe+0x43/0x50
[ 6.550882] bus_probe_device+0x29/0x90
[ 6.551340] device_add+0x682/0x860
[ 6.551836] ? dev_set_name+0x3e/0x50
[ 6.552282] platform_device_add+0x176/0x260
[ 6.552820] cxl_test_init+0x80c/0xff0 [cxl_test]
[ 6.553348] ? __pfx_cxl_test_init+0x10/0x10 [cxl_test]
[ 6.553954] do_one_initcall+0x46/0x220
[ 6.554411] do_init_module+0x63/0x240
[ 6.554926] load_module+0x2826/0x2b40
[ 6.555410] ? kernel_read+0x3f/0x50
[ 6.555931] ? kernel_read_file+0x27b/0x2f0
[ 6.556414] init_module_from_file+0xbc/0xf0
[ 6.556964] __x64_sys_finit_module+0x267/0x380
[ 6.557473] x64_sys_call+0x1d68/0x2010
[ 6.557958] do_syscall_64+0x5a/0x470
[ 6.558399] entry_SYSCALL_64_after_hwframe+0x71/0x79
[ 6.558981] RIP: 0033:0x7f900110b27d
[ 6.559405] Code: 5d c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 cb 0e 00 f7 d8 64 89 01 48
[ 6.561245] RSP: 002b:00007fff4b386268 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[ 6.562035] RAX: ffffffffffffffda RBX: 000055b7253f34b0 RCX: 00007f900110b27d
[ 6.562779] RDX: 0000000000000000 RSI: 00007f900178f43c RDI: 000000000000000c
[ 6.563473] RBP: 00007f900178f43c R08: 0000000000000000 R09: 000055b7253f8f80
[ 6.564187] R10: 000000000000000c R11: 0000000000000246 R12: 0000000000020000
[ 6.564918] R13: 000055b7253f4cc0 R14: 0000000000000000 R15: 000055b7253f97c0
[ 6.565601] </TASK>
[ 6.565895] Modules linked in: cxl_test(O+) cxl_acpi(O) cxl_pmem(O) cxl_mem(O) cxl_port(O) cxl_mock(O) cxl_core(O) fwctl libnvdimm
[ 6.566999] CR2: 0000000000000091
[ 6.567417] ---[ end trace 0000000000000000 ]---
[ 6.567990] RIP: 0010:acpi_device_hid+0x18/0x30
[ 6.568661] Code: cc cc 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 48 8b 87 98 00 00 00 48 81 c7 98 00 00 00 48 39 f8 74 0e 48 85 c0 74 09 <48> 8b 40 10 c3 cc cc cc cc 48 c7 c0 ad c9 a7 82 c3 cc cc cc cc 0f
[ 6.573628] RSP: 0018:ffffc90002077870 EFLAGS: 00010206
[ 6.575230] RAX: 0000000000000081 RBX: ffff8882000eb010 RCX: ffffffffa00504f0
[ 6.577279] RDX: ffff88800199acc8 RSI: ffff888006ca6000 RDI: ffff8882000eae48
[ 6.579343] RBP: ffffc900020778c0 R08: ffffffffa0e67491 R09: 0000000000000040
[ 6.581407] R10: ffff888203667c00 R11: ffffffff835ccf70 R12: ffff888006ba3010
[ 6.583237] R13: ffff888006ca6000 R14: 0000000000000000 R15: ffff888006ca6000
[ 6.584595] FS: 00007f9001205480(0000) GS:ffff8880fa501000(0000) knlGS:0000000000000000
[ 6.586087] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 6.587256] CR2: 0000000000000091 CR3: 0000000007037003 CR4: 0000000000370ef0
[ 6.588634] note: systemd-modules[622] exited with irqs disabled
On Thu, 14 May 2026 10:19:43 -0700, Alison Schofield wrote: > > On some platforms (e.g., RISC-V and ARM64) that use the generic > > pci_acpi_scan_root() implementation, cxl_acpi_probe may run before > > acpi_pci_root driver has bound to ACPI0016 (CXL host bridge) devices. > > In this case, acpi_pci_find_root() returns NULL, causing > > to_cxl_host_bridge() to skip the device silently. This results in > > incomplete CXL port enumeration on first boot. > > > > Fix this by detecting the case where an ACPI0016 device exists but its > > PCI root bridge is not yet ready, and returning -EPROBE_DEFER to trigger > > a deferred probe retry. > > > > Signed-off-by: Chen Pei <cp0613@linux.alibaba.com> > > Hi Chen Pei, > > As Richard suggested, this fails for the mock platform in cxl-test. > (stack trace appended at end) > > With this diff applied on top of your patch, it works for cxl-test > AND I think it works for your case too. With real hardware, > ACPI_COMPANION returns the device, and with the mock platform, > ACPI_COMPANION returns NULL and the defer check is skipped. > > Try it out, and note that I didn't consider if any of the comments > need updating. Hi Alison, Thanks for the fix! I'll test it on my RISC-V QEMU setup and fold it into v2 along with the other review feedback (switching to acpi_dev_hid_match(), dropping the redundant check in add_host_bridge_uport(), etc.). Will also run the CXL kselftests to make sure the mock platform path works correctly. Thanks, Pei
On 5/13/26 7:32 PM, Chen Pei wrote:
> On some platforms (e.g., RISC-V and ARM64) that use the generic
> pci_acpi_scan_root() implementation, cxl_acpi_probe may run before
> acpi_pci_root driver has bound to ACPI0016 (CXL host bridge) devices.
> In this case, acpi_pci_find_root() returns NULL, causing
> to_cxl_host_bridge() to skip the device silently. This results in
> incomplete CXL port enumeration on first boot.
>
> Fix this by detecting the case where an ACPI0016 device exists but its
> PCI root bridge is not yet ready, and returning -EPROBE_DEFER to trigger
> a deferred probe retry.
>
> Signed-off-by: Chen Pei <cp0613@linux.alibaba.com>
> ---
> drivers/cxl/acpi.c | 26 ++++++++++++++++++++++++--
> 1 file changed, 24 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> index 127537628817..9952d0cff903 100644
> --- a/drivers/cxl/acpi.c
> +++ b/drivers/cxl/acpi.c
> @@ -631,8 +631,21 @@ static int add_host_bridge_dport(struct device *match, void *arg)
> struct acpi_pci_root *pci_root;
> struct cxl_port *root_port = arg;
> struct device *host = root_port->dev.parent;
> - struct acpi_device *hb = to_cxl_host_bridge(host, match);
> + struct acpi_device *adev = to_acpi_device(match);
> + struct acpi_device *hb;
>
> + /*
> + * If this is an ACPI0016 device but acpi_pci_find_root() hasn't
> + * found the PCI root yet (driver not probed), defer the probe
> + * to allow acpi_pci_root to bind first.
> + */
> + if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
> + !acpi_pci_find_root(adev->handle)) {
> + dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
> + return -EPROBE_DEFER;
> + }
> +
> + hb = to_cxl_host_bridge(host, match);
> if (!hb)
> return 0;
>
> @@ -688,7 +701,8 @@ static int add_host_bridge_uport(struct device *match, void *arg)
> {
> struct cxl_port *root_port = arg;
> struct device *host = root_port->dev.parent;
> - struct acpi_device *hb = to_cxl_host_bridge(host, match);
> + struct acpi_device *adev = to_acpi_device(match);
> + struct acpi_device *hb;
> struct acpi_pci_root *pci_root;
> struct cxl_dport *dport;
> struct cxl_port *port;
> @@ -697,6 +711,14 @@ static int add_host_bridge_uport(struct device *match, void *arg)
> resource_size_t component_reg_phys;
> int rc;
>
> + /* Same deferral check as in add_host_bridge_dport() */
> + if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
> + !acpi_pci_find_root(adev->handle)) {
> + dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
> + return -EPROBE_DEFER;
> + }
I don't believe this check in add_host_bridge_uport() is necessary. add_host_bridge_dport() happens first and the check should've been done there. There is no reason to do it again and you'll never reach here if it's not ready.
DJ
> +
> + hb = to_cxl_host_bridge(host, match);
> if (!hb)
> return 0;
>
On Thu, 14 May 2026 10:10:50 -0700, Dave Jiang wrote:
> > On some platforms (e.g., RISC-V and ARM64) that use the generic
> > pci_acpi_scan_root() implementation, cxl_acpi_probe may run before
> > acpi_pci_root driver has bound to ACPI0016 (CXL host bridge) devices.
> > In this case, acpi_pci_find_root() returns NULL, causing
> > to_cxl_host_bridge() to skip the device silently. This results in
> > incomplete CXL port enumeration on first boot.
> >
> > Fix this by detecting the case where an ACPI0016 device exists but its
> > PCI root bridge is not yet ready, and returning -EPROBE_DEFER to trigger
> > a deferred probe retry.
> >
> > Signed-off-by: Chen Pei <cp0613@linux.alibaba.com>
> > ---
> > drivers/cxl/acpi.c | 26 ++++++++++++++++++++++++--
> > 1 file changed, 24 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> > index 127537628817..9952d0cff903 100644
> > --- a/drivers/cxl/acpi.c
> > +++ b/drivers/cxl/acpi.c
> > @@ -631,8 +631,21 @@ static int add_host_bridge_dport(struct device *match, void *arg)
> > struct acpi_pci_root *pci_root;
> > struct cxl_port *root_port = arg;
> > struct device *host = root_port->dev.parent;
> > - struct acpi_device *hb = to_cxl_host_bridge(host, match);
> > + struct acpi_device *adev = to_acpi_device(match);
> > + struct acpi_device *hb;
> >
> > + /*
> > + * If this is an ACPI0016 device but acpi_pci_find_root() hasn't
> > + * found the PCI root yet (driver not probed), defer the probe
> > + * to allow acpi_pci_root to bind first.
> > + */
> > + if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
> > + !acpi_pci_find_root(adev->handle)) {
> > + dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
> > + return -EPROBE_DEFER;
> > + }
> > +
> > + hb = to_cxl_host_bridge(host, match);
> > if (!hb)
> > return 0;
> >
> > @@ -688,7 +701,8 @@ static int add_host_bridge_uport(struct device *match, void *arg)
> > {
> > struct cxl_port *root_port = arg;
> > struct device *host = root_port->dev.parent;
> > - struct acpi_device *hb = to_cxl_host_bridge(host, match);
> > + struct acpi_device *adev = to_acpi_device(match);
> > + struct acpi_device *hb;
> > struct acpi_pci_root *pci_root;
> > struct cxl_dport *dport;
> > struct cxl_port *port;
> > @@ -697,6 +711,14 @@ static int add_host_bridge_uport(struct device *match, void *arg)
> > resource_size_t component_reg_phys;
> > int rc;
> >
> > + /* Same deferral check as in add_host_bridge_dport() */
> > + if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
> > + !acpi_pci_find_root(adev->handle)) {
> > + dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
> > + return -EPROBE_DEFER;
> > + }
>
> I don't believe this check in add_host_bridge_uport() is necessary. add_host_bridge_dport() happens first and the check should've been done there. There is no reason to do it again and you'll never reach here if it's not ready.
>
> DJ
Hi Dave,
You're right. Since add_host_bridge_dport() is called first and
cxl_acpi_probe() returns early on error, add_host_bridge_uport()
will never be reached if the PCI root is not ready. I'll drop the
duplicate check in v2.
Thanks,
Pei
> > +
> > + hb = to_cxl_host_bridge(host, match);
> > if (!hb)
> > return 0;
On Thu, May 14, 2026 at 10:32:38AM +0800, Chen Pei wrote:
> On some platforms (e.g., RISC-V and ARM64) that use the generic
> pci_acpi_scan_root() implementation, cxl_acpi_probe may run before
> acpi_pci_root driver has bound to ACPI0016 (CXL host bridge) devices.
> In this case, acpi_pci_find_root() returns NULL, causing
> to_cxl_host_bridge() to skip the device silently. This results in
> incomplete CXL port enumeration on first boot.
>
> Fix this by detecting the case where an ACPI0016 device exists but its
> PCI root bridge is not yet ready, and returning -EPROBE_DEFER to trigger
> a deferred probe retry.
>
> Signed-off-by: Chen Pei <cp0613@linux.alibaba.com>
> ---
> drivers/cxl/acpi.c | 26 ++++++++++++++++++++++++--
> 1 file changed, 24 insertions(+), 2 deletions(-)
>
Hi Chen Pei,
Thanks for the patch.
I have a few questions and suggestions regarding to your changes.
First of all I would like in which scenario did you encounter the bug?
Any specific CONFIG options and the devices ? what's the error log ?
It would be nice if you can attach it for us.
> diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> index 127537628817..9952d0cff903 100644
> --- a/drivers/cxl/acpi.c
> +++ b/drivers/cxl/acpi.c
> @@ -631,8 +631,21 @@ static int add_host_bridge_dport(struct device *match, void *arg)
> struct acpi_pci_root *pci_root;
> struct cxl_port *root_port = arg;
> struct device *host = root_port->dev.parent;
> - struct acpi_device *hb = to_cxl_host_bridge(host, match);
> + struct acpi_device *adev = to_acpi_device(match);
> + struct acpi_device *hb;
>
> + /*
> + * If this is an ACPI0016 device but acpi_pci_find_root() hasn't
> + * found the PCI root yet (driver not probed), defer the probe
> + * to allow acpi_pci_root to bind first.
> + */
> + if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
> + !acpi_pci_find_root(adev->handle)) {
> + dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
> + return -EPROBE_DEFER;
> + }
What about strncpy() here since we already know we're comparing against "ACPI0016" ?
At the same time, why not just use "acpi_dev_hid_match()" ? it's widely used across
numerous files.
> +
> + hb = to_cxl_host_bridge(host, match);
> if (!hb)
> return 0;
>
> @@ -688,7 +701,8 @@ static int add_host_bridge_uport(struct device *match, void *arg)
> {
> struct cxl_port *root_port = arg;
> struct device *host = root_port->dev.parent;
> - struct acpi_device *hb = to_cxl_host_bridge(host, match);
> + struct acpi_device *adev = to_acpi_device(match);
> + struct acpi_device *hb;
> struct acpi_pci_root *pci_root;
> struct cxl_dport *dport;
> struct cxl_port *port;
> @@ -697,6 +711,14 @@ static int add_host_bridge_uport(struct device *match, void *arg)
> resource_size_t component_reg_phys;
> int rc;
>
> + /* Same deferral check as in add_host_bridge_dport() */
> + if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0 &&
> + !acpi_pci_find_root(adev->handle)) {
> + dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
> + return -EPROBE_DEFER;
> + }
> +
> + hb = to_cxl_host_bridge(host, match);
> if (!hb)
> return 0;
>
> --
> 2.50.1
>
>
These 2 checks are basically the same, can we put it in a static inline helper or
a macro if possible? something like the following might be better
```
static int cxl_acpi_defer_host_bridge(struct device *host,
struct acpi_device *adev)
{
if (acpi_dev_hid_match(adev, "ACPI0016") &&
!acpi_pci_find_root(adev->handle)) {
dev_dbg(host, "deferring probe, ACPI0016 PCI root not ready\n");
return -EPROBE_DEFER;
}
return 0;
}
```
and use it in your code like
```
int rc = cxl_acpi_defer_host_bridge(host, adev);
if (rc)
return rc;
```
Last but not least, have you run the kselftests of CXL ? some mock bridges
are platform devices, not ACPI devices, you are using "to_acpi_device(match)", this
is not a safe runtime check when "match" is a platform_device, the code will read the memory
layout wrongly.
Best regards,
Richard Cheng.
© 2016 - 2026 Red Hat, Inc.