drivers/thunderbolt/tb.c | 95 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+)
PCIe devices behind Thunderbolt tunnels may fail to enumerate when
spurious hotplug events prevent pciehp from detecting link-up.
Root cause:
Spurious unplug events occur immediately after tunnel activation:
[ 932.438] thunderbolt: acking hot unplug event on 702:2
[ 932.852] thunderbolt: PCIe Up path activation complete
[ 932.855] thunderbolt: hotplug event for upstream port 702:2
(unplug: 0)
[ 932.855] thunderbolt: hotplug event for upstream port 702:2
(unplug: 1)
These events disrupt pciehp timing, causing device enumeration to fail
~70% of the time on affected hardware. Manual PCI rescan succeeds,
proving devices are present and functional on the bus.
Solution:
Schedule delayed work (300ms) after tunnel activation to:
1. Check if pciehp successfully enumerated devices (device count increased)
2. If not, trigger pci_rescan_bus() to discover devices manually
3. Log results for observability
The delayed work approach is non-blocking and only rescans when actually
needed, avoiding overhead on systems where pciehp works correctly.
Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
---
Logs: https://people.canonical.com/~acelan/bugs/tbt_storage/
merged.out.bad: Plugged-in TBT storage, but eventually fails to enumerate
merged.out.good: Plugged-in TBT storage, and successfully enumerates
merged.out.patched: Plugged-in TBT storage, it should fail without this
patch, but it works now
---
drivers/thunderbolt/tb.c | 95 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 95 insertions(+)
diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index 293fc9f258a5c..a3761be6eeea4 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -11,6 +11,7 @@
#include <linux/delay.h>
#include <linux/pm_runtime.h>
#include <linux/platform_data/x86/apple.h>
+#include <linux/pci.h>
#include "tb.h"
#include "tb_regs.h"
@@ -18,6 +19,7 @@
#define TB_TIMEOUT 100 /* ms */
#define TB_RELEASE_BW_TIMEOUT 10000 /* ms */
+#define TB_PCIEHP_ENUMERATION_DELAY 300 /* ms */
/*
* How many time bandwidth allocation request from graphics driver is
@@ -83,6 +85,16 @@ struct tb_hotplug_event {
int retry;
};
+/* Delayed work to verify PCIe enumeration after tunnel activation */
+struct tb_pci_rescan_work {
+ struct delayed_work work;
+ struct tb *tb;
+ struct pci_bus *bus;
+ int devices_before;
+ u64 route;
+ u8 port;
+};
+
static void tb_scan_port(struct tb_port *port);
static void tb_handle_hotplug(struct work_struct *work);
static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port,
@@ -90,6 +102,60 @@ static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port,
static void tb_queue_dp_bandwidth_request(struct tb *tb, u64 route, u8 port,
int retry, unsigned long delay);
+static void tb_pci_rescan_work_fn(struct work_struct *work)
+{
+ struct tb_pci_rescan_work *rescan_work =
+ container_of(work, typeof(*rescan_work), work.work);
+ struct tb *tb = rescan_work->tb;
+ struct pci_bus *bus = rescan_work->bus;
+ int devices_after = 0;
+ struct pci_dev *dev;
+ struct tb_switch *sw;
+ struct tb_port *port;
+
+ mutex_lock(&tb->lock);
+
+ sw = tb_switch_find_by_route(tb, rescan_work->route);
+ if (!sw) {
+ tb_dbg(tb, "Switch at route %llx disappeared, skipping rescan\n",
+ rescan_work->route);
+ goto out_unlock;
+ }
+
+ port = &sw->ports[rescan_work->port];
+
+ pci_lock_rescan_remove();
+ for_each_pci_dev(dev)
+ devices_after++;
+ pci_unlock_rescan_remove();
+
+ if (devices_after > rescan_work->devices_before) {
+ tb_port_dbg(port, "pciehp enumerated %d new device(s)\n",
+ devices_after - rescan_work->devices_before);
+ } else {
+ tb_port_info(port, "pciehp failed to enumerate devices, triggering rescan\n");
+
+ pci_lock_rescan_remove();
+ pci_rescan_bus(bus);
+
+ devices_after = 0;
+ for_each_pci_dev(dev)
+ devices_after++;
+ pci_unlock_rescan_remove();
+
+ if (devices_after > rescan_work->devices_before)
+ tb_port_info(port, "rescan found %d new device(s)\n",
+ devices_after - rescan_work->devices_before);
+ else
+ tb_port_warn(port, "no devices found even after rescan\n");
+ }
+
+ tb_switch_put(sw);
+out_unlock:
+ mutex_unlock(&tb->lock);
+ kfree(rescan_work);
+}
+
static void tb_queue_hotplug(struct tb *tb, u64 route, u8 port, bool unplug)
{
struct tb_hotplug_event *ev;
@@ -2400,6 +2466,35 @@ static int tb_tunnel_pci(struct tb *tb, struct tb_switch *sw)
tb_sw_warn(sw, "failed to connect xHCI\n");
list_add_tail(&tunnel->list, &tcm->tunnel_list);
+
+ /* Verify pciehp enumeration; trigger rescan if needed */
+ if (tb->nhi && tb->nhi->pdev && tb->nhi->pdev->bus) {
+ struct pci_bus *bus = tb->nhi->pdev->bus;
+ struct pci_bus *scan_bus = bus->parent ? bus->parent : bus;
+ struct tb_pci_rescan_work *rescan_work;
+ struct pci_dev *dev;
+ int devices_before = 0;
+
+ pci_lock_rescan_remove();
+ for_each_pci_dev(dev)
+ devices_before++;
+ pci_unlock_rescan_remove();
+
+ rescan_work = kmalloc(sizeof(*rescan_work), GFP_KERNEL);
+ if (!rescan_work)
+ return 0;
+
+ rescan_work->tb = tb;
+ rescan_work->bus = scan_bus;
+ rescan_work->devices_before = devices_before;
+ rescan_work->route = tb_route(sw);
+ rescan_work->port = up->port;
+
+ INIT_DELAYED_WORK(&rescan_work->work, tb_pci_rescan_work_fn);
+ queue_delayed_work(tb->wq, &rescan_work->work,
+ msecs_to_jiffies(TB_PCIEHP_ENUMERATION_DELAY));
+ }
+
return 0;
}
--
2.51.0
Hi Chia-Lin,
kernel test robot noticed the following build warnings:
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Chia-Lin-Kao-AceLan/thunderbolt-Fix-PCIe-device-enumeration-with-delayed-rescan/20260121-141206
base: https://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git next
patch link: https://lore.kernel.org/r/20260121060857.237777-1-acelan.kao%40canonical.com
patch subject: [PATCH v2] thunderbolt: Fix PCIe device enumeration with delayed rescan
config: arm64-randconfig-r073-20260121 (https://download.01.org/0day-ci/archive/20260122/202601220733.dS3GMZRD-lkp@intel.com/config)
compiler: aarch64-linux-gcc (GCC) 8.5.0
smatch version: v0.5.0-8985-g2614ff1a
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202601220733.dS3GMZRD-lkp@intel.com/
smatch warnings:
drivers/thunderbolt/tb.c:128 tb_pci_rescan_work_fn() error: uninitialized symbol 'dev'.
drivers/thunderbolt/tb.c:2392 tb_tunnel_pci() error: uninitialized symbol 'dev'.
vim +/dev +128 drivers/thunderbolt/tb.c
d6d458d42e1e15 Mika Westerberg 2024-08-20 102 static void tb_queue_dp_bandwidth_request(struct tb *tb, u64 route, u8 port,
d6d458d42e1e15 Mika Westerberg 2024-08-20 103 int retry, unsigned long delay);
4f807e47ee9a75 Mika Westerberg 2018-09-17 104
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 105) static void tb_pci_rescan_work_fn(struct work_struct *work)
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 106) {
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 107) struct tb_pci_rescan_work *rescan_work =
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 108) container_of(work, typeof(*rescan_work), work.work);
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 109) struct tb *tb = rescan_work->tb;
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 110) struct pci_bus *bus = rescan_work->bus;
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 111) int devices_after = 0;
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 112) struct pci_dev *dev;
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 113) struct tb_switch *sw;
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 114) struct tb_port *port;
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 115)
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 116) mutex_lock(&tb->lock);
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 117)
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 118) sw = tb_switch_find_by_route(tb, rescan_work->route);
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 119) if (!sw) {
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 120) tb_dbg(tb, "Switch at route %llx disappeared, skipping rescan\n",
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 121) rescan_work->route);
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 122) goto out_unlock;
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 123) }
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 124)
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 125) port = &sw->ports[rescan_work->port];
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 126)
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 127) pci_lock_rescan_remove();
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 @128) for_each_pci_dev(dev)
^^^
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 129) devices_after++;
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 130) pci_unlock_rescan_remove();
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 131)
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 132) if (devices_after > rescan_work->devices_before) {
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 133) tb_port_dbg(port, "pciehp enumerated %d new device(s)\n",
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 134) devices_after - rescan_work->devices_before);
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 135) } else {
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 136) tb_port_info(port, "pciehp failed to enumerate devices, triggering rescan\n");
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 137)
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 138) pci_lock_rescan_remove();
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 139) pci_rescan_bus(bus);
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 140)
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 141) devices_after = 0;
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 142) for_each_pci_dev(dev)
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 143) devices_after++;
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 144) pci_unlock_rescan_remove();
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 145)
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 146) if (devices_after > rescan_work->devices_before)
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 147) tb_port_info(port, "rescan found %d new device(s)\n",
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 148) devices_after - rescan_work->devices_before);
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 149) else
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 150) tb_port_warn(port, "no devices found even after rescan\n");
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 151) }
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 152)
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 153) tb_switch_put(sw);
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 154) out_unlock:
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 155) mutex_unlock(&tb->lock);
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 156) kfree(rescan_work);
eb51ddfd22dbc5 Chia-Lin Kao (AceLan 2026-01-21 157) }
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Chia-Lin,
kernel test robot noticed the following build warnings:
[auto build test WARNING on westeri-thunderbolt/next]
[also build test WARNING on linus/master v6.19-rc6 next-20260120]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Chia-Lin-Kao-AceLan/thunderbolt-Fix-PCIe-device-enumeration-with-delayed-rescan/20260121-141206
base: https://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git next
patch link: https://lore.kernel.org/r/20260121060857.237777-1-acelan.kao%40canonical.com
patch subject: [PATCH v2] thunderbolt: Fix PCIe device enumeration with delayed rescan
config: i386-buildonly-randconfig-001-20260121 (https://download.01.org/0day-ci/archive/20260122/202601220225.L3boOv5a-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260122/202601220225.L3boOv5a-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601220225.L3boOv5a-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> drivers/thunderbolt/tb.c:119:6: warning: variable 'dev' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
119 | if (!sw) {
| ^~~
drivers/thunderbolt/tb.c:128:19: note: uninitialized use occurs here
128 | for_each_pci_dev(dev)
| ^~~
include/linux/pci.h:594:80: note: expanded from macro 'for_each_pci_dev'
594 | #define for_each_pci_dev(d) while ((d = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, d)) != NULL)
| ^
drivers/thunderbolt/tb.c:119:2: note: remove the 'if' if its condition is always true
119 | if (!sw) {
| ^~~~~~~~
drivers/thunderbolt/tb.c:112:21: note: initialize the variable 'dev' to silence this warning
112 | struct pci_dev *dev;
| ^
| = NULL
>> drivers/thunderbolt/tb.c:2384:6: warning: variable 'dev' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
2384 | if (tb->nhi && tb->nhi->pdev && tb->nhi->pdev->bus) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/thunderbolt/tb.c:2392:20: note: uninitialized use occurs here
2392 | for_each_pci_dev(dev)
| ^~~
include/linux/pci.h:594:80: note: expanded from macro 'for_each_pci_dev'
594 | #define for_each_pci_dev(d) while ((d = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, d)) != NULL)
| ^
drivers/thunderbolt/tb.c:2384:2: note: remove the 'if' if its condition is always false
2384 | if (tb->nhi && tb->nhi->pdev && tb->nhi->pdev->bus) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2385 | struct pci_bus *bus = tb->nhi->pdev->bus;
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2386 | struct pci_bus *scan_bus = bus->parent ? bus->parent : bus;
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2387 | struct tb_pci_rescan_work *rescan_work;
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2388 | struct pci_dev *dev;
| ~~~~~~~~~~~~~~~~~~~~
2389 | int devices_before = 0;
| ~~~~~~~~~~~~~~~~~~~~~~~
2390 |
2391 | pci_lock_rescan_remove();
| ~~~~~~~~~~~~~~~~~~~~~~~~~
2392 | for_each_pci_dev(dev)
| ~~~~~~~~~~~~~~~~~~~~~
2393 | devices_before++;
| ~~~~~~~~~~~~~~~~~
2394 | pci_unlock_rescan_remove();
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2395 |
2396 | rescan_work = kmalloc(sizeof(*rescan_work), GFP_KERNEL);
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2397 | if (!rescan_work)
| ~~~~~~~~~~~~~~~~~
2398 | return 0;
| ~~~~~~~~~
2399 |
drivers/thunderbolt/tb.c:2388:22: note: initialize the variable 'dev' to silence this warning
2388 | struct pci_dev *dev;
| ^
| = NULL
2 warnings generated.
vim +119 drivers/thunderbolt/tb.c
97
98 static void tb_scan_port(struct tb_port *port);
99 static void tb_handle_hotplug(struct work_struct *work);
100 static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port,
101 const char *reason);
102 static void tb_queue_dp_bandwidth_request(struct tb *tb, u64 route, u8 port,
103 int retry, unsigned long delay);
104
105 static void tb_pci_rescan_work_fn(struct work_struct *work)
106 {
107 struct tb_pci_rescan_work *rescan_work =
108 container_of(work, typeof(*rescan_work), work.work);
109 struct tb *tb = rescan_work->tb;
110 struct pci_bus *bus = rescan_work->bus;
111 int devices_after = 0;
112 struct pci_dev *dev;
113 struct tb_switch *sw;
114 struct tb_port *port;
115
116 mutex_lock(&tb->lock);
117
118 sw = tb_switch_find_by_route(tb, rescan_work->route);
> 119 if (!sw) {
120 tb_dbg(tb, "Switch at route %llx disappeared, skipping rescan\n",
121 rescan_work->route);
122 goto out_unlock;
123 }
124
125 port = &sw->ports[rescan_work->port];
126
127 pci_lock_rescan_remove();
128 for_each_pci_dev(dev)
129 devices_after++;
130 pci_unlock_rescan_remove();
131
132 if (devices_after > rescan_work->devices_before) {
133 tb_port_dbg(port, "pciehp enumerated %d new device(s)\n",
134 devices_after - rescan_work->devices_before);
135 } else {
136 tb_port_info(port, "pciehp failed to enumerate devices, triggering rescan\n");
137
138 pci_lock_rescan_remove();
139 pci_rescan_bus(bus);
140
141 devices_after = 0;
142 for_each_pci_dev(dev)
143 devices_after++;
144 pci_unlock_rescan_remove();
145
146 if (devices_after > rescan_work->devices_before)
147 tb_port_info(port, "rescan found %d new device(s)\n",
148 devices_after - rescan_work->devices_before);
149 else
150 tb_port_warn(port, "no devices found even after rescan\n");
151 }
152
153 tb_switch_put(sw);
154 out_unlock:
155 mutex_unlock(&tb->lock);
156 kfree(rescan_work);
157 }
158
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Wed, Jan 21, 2026 at 02:08:57PM +0800, Chia-Lin Kao (AceLan) wrote: > PCIe devices behind Thunderbolt tunnels may fail to enumerate when > spurious hotplug events prevent pciehp from detecting link-up. See my reply for the v1. Let's discuss there.
© 2016 - 2026 Red Hat, Inc.