drivers/usb/typec/ucsi/ucsi.c | 50 ++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 19 deletions(-)
During UCSI initialization and operation, there is a race condition where
delayed work items can be scheduled but attempt to queue work after the
workqueue has been destroyed. This occurs in multiple code paths.
The race occurs when:
1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work
2. Connector cleanup paths call destroy_workqueue()
3. Previously scheduled delayed work timers fire after destruction
4. This triggers warnings and crashes in __queue_work()
The issue is timing-sensitive and typically manifests when:
- Port registration fails due to PPM timing issues
- System shutdown/cleanup occurs with pending delayed work
- Module removal races with active delayed work
Fix this by:
1. Creating ucsi_destroy_connector_wq() helper function that safely
cancels all pending delayed work before destroying workqueues
2. Applying the safe cleanup to all three workqueue destruction paths:
- ucsi_register_port() error path
- ucsi_init() error path
- ucsi_unregister() cleanup path
This prevents both the initial queueing on destroyed workqueues and
retry attempts from running workers, eliminating the timer races.
Fixes: b9aa02ca39a4 ("usb: typec: ucsi: Add polling mechanism for partner tasks like alt mode checking")
Cc: stable@vger.kernel.org
Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
---
drivers/usb/typec/ucsi/ucsi.c | 50 ++++++++++++++++++++++-------------
1 file changed, 31 insertions(+), 19 deletions(-)
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 5ba3a6c81964..1f71c9983163 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -283,6 +283,33 @@ static void ucsi_poll_worker(struct work_struct *work)
mutex_unlock(&con->lock);
}
+/**
+ * ucsi_destroy_connector_wq - Safely destroy connector workqueue
+ * @con: UCSI connector
+ *
+ * Cancel all pending delayed work and destroy the workqueue to prevent
+ * timer races where delayed work tries to queue on destroyed workqueue.
+ */
+static void ucsi_destroy_connector_wq(struct ucsi_connector *con)
+{
+ struct ucsi_work *uwork, *tmp;
+
+ if (!con->wq)
+ return;
+
+ /* Cancel any pending delayed work before destroying workqueue */
+ mutex_lock(&con->lock);
+ list_for_each_entry_safe(uwork, tmp, &con->partner_tasks, node) {
+ cancel_delayed_work_sync(&uwork->work);
+ list_del(&uwork->node);
+ kfree(uwork);
+ }
+ mutex_unlock(&con->lock);
+
+ destroy_workqueue(con->wq);
+ con->wq = NULL;
+}
+
static int ucsi_partner_task(struct ucsi_connector *con,
int (*cb)(struct ucsi_connector *),
int retries, unsigned long delay)
@@ -1798,10 +1825,8 @@ static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con)
out_unlock:
mutex_unlock(&con->lock);
- if (ret && con->wq) {
- destroy_workqueue(con->wq);
- con->wq = NULL;
- }
+ if (ret)
+ ucsi_destroy_connector_wq(con);
return ret;
}
@@ -1921,8 +1946,7 @@ static int ucsi_init(struct ucsi *ucsi)
err_unregister:
for (con = connector; con->port; con++) {
- if (con->wq)
- destroy_workqueue(con->wq);
+ ucsi_destroy_connector_wq(con);
ucsi_unregister_partner(con);
ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON);
ucsi_unregister_port_psy(con);
@@ -2144,19 +2168,7 @@ void ucsi_unregister(struct ucsi *ucsi)
for (i = 0; i < ucsi->cap.num_connectors; i++) {
cancel_work_sync(&ucsi->connector[i].work);
- if (ucsi->connector[i].wq) {
- struct ucsi_work *uwork;
-
- mutex_lock(&ucsi->connector[i].lock);
- /*
- * queue delayed items immediately so they can execute
- * and free themselves before the wq is destroyed
- */
- list_for_each_entry(uwork, &ucsi->connector[i].partner_tasks, node)
- mod_delayed_work(ucsi->connector[i].wq, &uwork->work, 0);
- mutex_unlock(&ucsi->connector[i].lock);
- destroy_workqueue(ucsi->connector[i].wq);
- }
+ ucsi_destroy_connector_wq(&ucsi->connector[i]);
ucsi_unregister_partner(&ucsi->connector[i]);
ucsi_unregister_altmodes(&ucsi->connector[i],
--
2.43.0
On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote: > During UCSI initialization and operation, there is a race condition where > delayed work items can be scheduled but attempt to queue work after the > workqueue has been destroyed. This occurs in multiple code paths. > > The race occurs when: > 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work > 2. Connector cleanup paths call destroy_workqueue() > 3. Previously scheduled delayed work timers fire after destruction > 4. This triggers warnings and crashes in __queue_work() What warnings? -- heikki
On Wed, Oct 08, 2025 at 01:34:35PM +0300, Heikki Krogerus wrote: > On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote: > > During UCSI initialization and operation, there is a race condition where > > delayed work items can be scheduled but attempt to queue work after the > > workqueue has been destroyed. This occurs in multiple code paths. > > > > The race occurs when: > > 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work > > 2. Connector cleanup paths call destroy_workqueue() > > 3. Previously scheduled delayed work timers fire after destruction > > 4. This triggers warnings and crashes in __queue_work() > > What warnings? Here is what I got. Sep 24 13:24:22 ubuntu kernel: sysfs: cannot create duplicate filename '/devices/platform/USBC000:00/typec/port0/port0.0/partner' Sep 24 13:24:22 ubuntu kernel: CPU: 1 UID: 0 PID: 132 Comm: kworker/u64:1 Tainted: G O 6.14.0-1012-oem #12-Ubuntu Sep 24 13:24:22 ubuntu kernel: Tainted: [O]=OOT_MODULE Sep 24 13:24:22 ubuntu kernel: Hardware name: Dell Inc. Dell /, BIOS XXXX XX/XX/2025 Sep 24 13:24:22 ubuntu kernel: Workqueue: USBC000:00-con1 ucsi_poll_worker [typec_ucsi] Sep 24 13:24:22 ubuntu kernel: Call Trace: Sep 24 13:24:22 ubuntu kernel: <TASK> Sep 24 13:24:22 ubuntu kernel: dump_stack_lvl+0x76/0xa0 Sep 24 13:24:22 ubuntu kernel: dump_stack+0x10/0x20 Sep 24 13:24:22 ubuntu kernel: sysfs_warn_dup+0x8a/0xb0 Sep 24 13:24:22 ubuntu kernel: sysfs_do_create_link_sd+0xf1/0x100 Sep 24 13:24:22 ubuntu kernel: sysfs_create_link+0x21/0x50 Sep 24 13:24:22 ubuntu kernel: typec_probe+0x7e/0x100 [typec] Sep 24 13:24:22 ubuntu kernel: ? driver_sysfs_add+0x66/0xd0 Sep 24 13:24:22 ubuntu kernel: really_probe+0xee/0x3c0 Sep 24 13:24:22 ubuntu kernel: __driver_probe_device+0x8c/0x180 Sep 24 13:24:22 ubuntu kernel: driver_probe_device+0x24/0xd0 Sep 24 13:24:22 ubuntu kernel: __device_attach_driver+0xcd/0x170 Sep 24 13:24:22 ubuntu kernel: ? _pfx__device_attach_driver+0x10/0x10 Sep 24 13:24:22 ubuntu kernel: bus_for_each_drv+0x94/0xf0 Sep 24 13:24:22 ubuntu kernel: __device_attach+0xb6/0x1d0 Sep 24 13:24:22 ubuntu kernel: device_initial_probe+0x13/0x20 Sep 24 13:24:22 ubuntu kernel: bus_probe_device+0x9f/0xb0 Sep 24 13:24:22 ubuntu kernel: device_add+0x513/0x710 Sep 24 13:24:22 ubuntu kernel: device_register+0x1a/0x30 Sep 24 13:24:22 ubuntu kernel: typec_register_altmode+0x253/0x3a0 [typec] Sep 24 13:24:22 ubuntu kernel: typec_partner_register_altmode+0xe/0x20 [typec] Sep 24 13:24:22 ubuntu kernel: ucsi_register_altmode.constprop.0+0x30e/0x390 [typec_ucsi] Sep 24 13:24:22 ubuntu kernel: ucsi_register_altmodes+0x162/0x250 [typec_ucsi] Sep 24 13:24:22 ubuntu kernel: ucsi_check_altmodes+0x19/0xb0 [typec_ucsi] Sep 24 13:24:22 ubuntu kernel: ucsi_poll_worker+0x3d/0xf0 [typec_ucsi] Sep 24 13:24:22 ubuntu kernel: process_one_work+0x178/0x3d0 Sep 24 13:24:22 ubuntu kernel: worker_thread+0x2de/0x410 Sep 24 13:24:22 ubuntu kernel: ? __pfx_worker_thread+0x10/0x10 Sep 24 13:24:22 ubuntu kernel: kthread+0xfb/0x230 Sep 24 13:24:22 ubuntu kernel: ? __pfx_kthread+0x10/0x10 Sep 24 13:24:22 ubuntu kernel: ret_from_fork+0x44/0x70 Sep 24 13:24:22 ubuntu kernel: ? __pfx_kthread+0x10/0x10 Sep 24 13:24:22 ubuntu kernel: ret_from_fork_asm+0x1a/0x30 Sep 24 13:24:22 ubuntu kernel: </TASK> Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: failed to create symlinks Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: probe with driver typec-thunderbolt failed with error -17 > > -- > heikki
On Thu, Oct 09, 2025 at 09:58:22AM +0800, Chia-Lin Kao (AceLan) wrote: > On Wed, Oct 08, 2025 at 01:34:35PM +0300, Heikki Krogerus wrote: > > On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote: > > > During UCSI initialization and operation, there is a race condition where > > > delayed work items can be scheduled but attempt to queue work after the > > > workqueue has been destroyed. This occurs in multiple code paths. > > > > > > The race occurs when: > > > 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work > > > 2. Connector cleanup paths call destroy_workqueue() > > > 3. Previously scheduled delayed work timers fire after destruction > > > 4. This triggers warnings and crashes in __queue_work() > > > > What warnings? > Here is what I got. > > Sep 24 13:24:22 ubuntu kernel: sysfs: cannot create duplicate filename '/devices/platform/USBC000:00/typec/port0/port0.0/partner' > Sep 24 13:24:22 ubuntu kernel: CPU: 1 UID: 0 PID: 132 Comm: kworker/u64:1 Tainted: G O 6.14.0-1012-oem #12-Ubuntu > Sep 24 13:24:22 ubuntu kernel: Tainted: [O]=OOT_MODULE > Sep 24 13:24:22 ubuntu kernel: Hardware name: Dell Inc. Dell /, BIOS XXXX XX/XX/2025 > Sep 24 13:24:22 ubuntu kernel: Workqueue: USBC000:00-con1 ucsi_poll_worker [typec_ucsi] > Sep 24 13:24:22 ubuntu kernel: Call Trace: > Sep 24 13:24:22 ubuntu kernel: <TASK> > Sep 24 13:24:22 ubuntu kernel: dump_stack_lvl+0x76/0xa0 > Sep 24 13:24:22 ubuntu kernel: dump_stack+0x10/0x20 > Sep 24 13:24:22 ubuntu kernel: sysfs_warn_dup+0x8a/0xb0 > Sep 24 13:24:22 ubuntu kernel: sysfs_do_create_link_sd+0xf1/0x100 > Sep 24 13:24:22 ubuntu kernel: sysfs_create_link+0x21/0x50 > Sep 24 13:24:22 ubuntu kernel: typec_probe+0x7e/0x100 [typec] > Sep 24 13:24:22 ubuntu kernel: ? driver_sysfs_add+0x66/0xd0 > Sep 24 13:24:22 ubuntu kernel: really_probe+0xee/0x3c0 > Sep 24 13:24:22 ubuntu kernel: __driver_probe_device+0x8c/0x180 > Sep 24 13:24:22 ubuntu kernel: driver_probe_device+0x24/0xd0 > Sep 24 13:24:22 ubuntu kernel: __device_attach_driver+0xcd/0x170 > Sep 24 13:24:22 ubuntu kernel: ? _pfx__device_attach_driver+0x10/0x10 > Sep 24 13:24:22 ubuntu kernel: bus_for_each_drv+0x94/0xf0 > Sep 24 13:24:22 ubuntu kernel: __device_attach+0xb6/0x1d0 > Sep 24 13:24:22 ubuntu kernel: device_initial_probe+0x13/0x20 > Sep 24 13:24:22 ubuntu kernel: bus_probe_device+0x9f/0xb0 > Sep 24 13:24:22 ubuntu kernel: device_add+0x513/0x710 > Sep 24 13:24:22 ubuntu kernel: device_register+0x1a/0x30 > Sep 24 13:24:22 ubuntu kernel: typec_register_altmode+0x253/0x3a0 [typec] > Sep 24 13:24:22 ubuntu kernel: typec_partner_register_altmode+0xe/0x20 [typec] > Sep 24 13:24:22 ubuntu kernel: ucsi_register_altmode.constprop.0+0x30e/0x390 [typec_ucsi] > Sep 24 13:24:22 ubuntu kernel: ucsi_register_altmodes+0x162/0x250 [typec_ucsi] > Sep 24 13:24:22 ubuntu kernel: ucsi_check_altmodes+0x19/0xb0 [typec_ucsi] > Sep 24 13:24:22 ubuntu kernel: ucsi_poll_worker+0x3d/0xf0 [typec_ucsi] > Sep 24 13:24:22 ubuntu kernel: process_one_work+0x178/0x3d0 > Sep 24 13:24:22 ubuntu kernel: worker_thread+0x2de/0x410 > Sep 24 13:24:22 ubuntu kernel: ? __pfx_worker_thread+0x10/0x10 > Sep 24 13:24:22 ubuntu kernel: kthread+0xfb/0x230 > Sep 24 13:24:22 ubuntu kernel: ? __pfx_kthread+0x10/0x10 > Sep 24 13:24:22 ubuntu kernel: ret_from_fork+0x44/0x70 > Sep 24 13:24:22 ubuntu kernel: ? __pfx_kthread+0x10/0x10 > Sep 24 13:24:22 ubuntu kernel: ret_from_fork_asm+0x1a/0x30 > Sep 24 13:24:22 ubuntu kernel: </TASK> > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: failed to create symlinks > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: probe with driver typec-thunderbolt failed with error -17 That does not look like anything you described in the commit message? You have there an attempt to register the same alternate mode twice, but the workqueue seems to be very much alive when that happens. Based on the above this looks like either a race where the driver really ends up registering the alternate modes multiple times or, and more likely, the firmware is reporting the same alternate mode multiple times. Or am I missing something? thanks, -- heikki
On Thu, Oct 09, 2025 at 05:08:53PM +0300, Heikki Krogerus wrote: > On Thu, Oct 09, 2025 at 09:58:22AM +0800, Chia-Lin Kao (AceLan) wrote: > > On Wed, Oct 08, 2025 at 01:34:35PM +0300, Heikki Krogerus wrote: > > > On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote: > > > > During UCSI initialization and operation, there is a race condition where > > > > delayed work items can be scheduled but attempt to queue work after the > > > > workqueue has been destroyed. This occurs in multiple code paths. > > > > > > > > The race occurs when: > > > > 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work > > > > 2. Connector cleanup paths call destroy_workqueue() > > > > 3. Previously scheduled delayed work timers fire after destruction > > > > 4. This triggers warnings and crashes in __queue_work() > > > > > > What warnings? > > Here is what I got. > > > > Sep 24 13:24:22 ubuntu kernel: sysfs: cannot create duplicate filename '/devices/platform/USBC000:00/typec/port0/port0.0/partner' > > Sep 24 13:24:22 ubuntu kernel: CPU: 1 UID: 0 PID: 132 Comm: kworker/u64:1 Tainted: G O 6.14.0-1012-oem #12-Ubuntu > > Sep 24 13:24:22 ubuntu kernel: Tainted: [O]=OOT_MODULE > > Sep 24 13:24:22 ubuntu kernel: Hardware name: Dell Inc. Dell /, BIOS XXXX XX/XX/2025 > > Sep 24 13:24:22 ubuntu kernel: Workqueue: USBC000:00-con1 ucsi_poll_worker [typec_ucsi] > > Sep 24 13:24:22 ubuntu kernel: Call Trace: > > Sep 24 13:24:22 ubuntu kernel: <TASK> > > Sep 24 13:24:22 ubuntu kernel: dump_stack_lvl+0x76/0xa0 > > Sep 24 13:24:22 ubuntu kernel: dump_stack+0x10/0x20 > > Sep 24 13:24:22 ubuntu kernel: sysfs_warn_dup+0x8a/0xb0 > > Sep 24 13:24:22 ubuntu kernel: sysfs_do_create_link_sd+0xf1/0x100 > > Sep 24 13:24:22 ubuntu kernel: sysfs_create_link+0x21/0x50 > > Sep 24 13:24:22 ubuntu kernel: typec_probe+0x7e/0x100 [typec] > > Sep 24 13:24:22 ubuntu kernel: ? driver_sysfs_add+0x66/0xd0 > > Sep 24 13:24:22 ubuntu kernel: really_probe+0xee/0x3c0 > > Sep 24 13:24:22 ubuntu kernel: __driver_probe_device+0x8c/0x180 > > Sep 24 13:24:22 ubuntu kernel: driver_probe_device+0x24/0xd0 > > Sep 24 13:24:22 ubuntu kernel: __device_attach_driver+0xcd/0x170 > > Sep 24 13:24:22 ubuntu kernel: ? _pfx__device_attach_driver+0x10/0x10 > > Sep 24 13:24:22 ubuntu kernel: bus_for_each_drv+0x94/0xf0 > > Sep 24 13:24:22 ubuntu kernel: __device_attach+0xb6/0x1d0 > > Sep 24 13:24:22 ubuntu kernel: device_initial_probe+0x13/0x20 > > Sep 24 13:24:22 ubuntu kernel: bus_probe_device+0x9f/0xb0 > > Sep 24 13:24:22 ubuntu kernel: device_add+0x513/0x710 > > Sep 24 13:24:22 ubuntu kernel: device_register+0x1a/0x30 > > Sep 24 13:24:22 ubuntu kernel: typec_register_altmode+0x253/0x3a0 [typec] > > Sep 24 13:24:22 ubuntu kernel: typec_partner_register_altmode+0xe/0x20 [typec] > > Sep 24 13:24:22 ubuntu kernel: ucsi_register_altmode.constprop.0+0x30e/0x390 [typec_ucsi] > > Sep 24 13:24:22 ubuntu kernel: ucsi_register_altmodes+0x162/0x250 [typec_ucsi] > > Sep 24 13:24:22 ubuntu kernel: ucsi_check_altmodes+0x19/0xb0 [typec_ucsi] > > Sep 24 13:24:22 ubuntu kernel: ucsi_poll_worker+0x3d/0xf0 [typec_ucsi] > > Sep 24 13:24:22 ubuntu kernel: process_one_work+0x178/0x3d0 > > Sep 24 13:24:22 ubuntu kernel: worker_thread+0x2de/0x410 > > Sep 24 13:24:22 ubuntu kernel: ? __pfx_worker_thread+0x10/0x10 > > Sep 24 13:24:22 ubuntu kernel: kthread+0xfb/0x230 > > Sep 24 13:24:22 ubuntu kernel: ? __pfx_kthread+0x10/0x10 > > Sep 24 13:24:22 ubuntu kernel: ret_from_fork+0x44/0x70 > > Sep 24 13:24:22 ubuntu kernel: ? __pfx_kthread+0x10/0x10 > > Sep 24 13:24:22 ubuntu kernel: ret_from_fork_asm+0x1a/0x30 > > Sep 24 13:24:22 ubuntu kernel: </TASK> > > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: failed to create symlinks > > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: probe with driver typec-thunderbolt failed with error -17 > > That does not look like anything you described in the commit message? > > You have there an attempt to register the same alternate mode twice, > but the workqueue seems to be very much alive when that happens. > > Based on the above this looks like either a race where the driver > really ends up registering the alternate modes multiple times or, and > more likely, the firmware is reporting the same alternate mode > multiple times. > > Or am I missing something? Here is another one. It's not immediately obvious how this one relates to typec_ucsi. [ 170.605181] ucsi_acpi USBC000:00: con2: failed to register alt modes [ 181.868900] ------------[ cut here ]------------ [ 181.868905] workqueue: cannot queue ucsi_poll_worker [typec_ucsi] on wq USBC000:00-con1 [ 181.868918] WARNING: CPU: 1 PID: 0 at kernel/workqueue.c:2255 __queue_work+0x420/0x5a0 ... [ 181.869062] CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 6.17.0-rc7+ #1 PREEMPT(voluntary) [ 181.869065] Hardware name: Dell Inc. , BIOS xx.xx.xx xx/xx/2025 [ 181.869067] RIP: 0010:__queue_work+0x420/0x5a0 [ 181.869070] Code: 00 00 41 83 e4 01 0f 85 57 fd ff ff 49 8b 77 18 48 8d 93 c0 00 00 00 48 c7 c7 00 8c bc 92 c6 05 27 47 68 02 01 e8 50 24 fd f f <0f> 0b e9 32 fd ff ff 0f 0b e9 1d fd ff ff 0f 0b e9 0f fd ff ff 0f [ 181.869072] RSP: 0018:ffffd53c000acdf8 EFLAGS: 00010046 [ 181.869075] RAX: 0000000000000000 RBX: ffff8ecd0727f200 RCX: 0000000000000000 [ 181.869076] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 181.869077] RBP: ffffd53c000ace38 R08: 0000000000000000 R09: 0000000000000000 [ 181.869078] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 181.869079] R13: ffffffff913995e0 R14: ffff8ecc824387a0 R15: ffff8ecc82438780 [ 181.869081] FS: 0000000000000000(0000) GS:ffff8eec0b92f000(0000) knlGS:0000000000000000 [ 181.869083] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 181.869084] CR2: 000005593e67a008 CR3: 0000001f41840002 CR4: 0000000000f72ef0 [ 181.869086] PKRU: 55555554 [ 181.869087] Call Trace: [ 181.869089] <IRQ> [ 181.869093] ? sched_clock+0x10/0x30 [ 181.869098] ? __pfx_delayed_work_timer_fn+0x10/0x10 [ 181.869100] delayed_work_timer_fn+0x19/0x30 [ 181.869102] call_timer_fn+0x2c/0x150 [ 181.869106] ? __pfx_delayed_work_timer_fn+0x10/0x10 [ 181.869108] __run_timers+0x1c6/0x2d0 [ 181.869111] run_timer_softirq+0x8a/0x100 [ 181.869114] handle_softirqs+0xe4/0x340 [ 181.869118] __irq_exit_rcu+0x10e/0x130 [ 181.869121] irq_exit_rcu+0xe/0x20 [ 181.869124] sysvec_apic_timer_interrupt+0xa0/0xc0 [ 181.869130] </IRQ> [ 181.869131] <TASK> [ 181.869132] asm_sysvec_apic_timer_interrupt+0x1b/0x20 [ 181.869135] RIP: 0010:cpuidle_enter_state+0xda/0x710 [ 181.869137] Code: 8f f7 fe e8 78 f0 ff ff 8b 53 04 49 89 c7 0f 1f 44 00 00 31 ff e8 86 bf f5 fe 80 7d d0 00 0f 85 22 02 00 00 fb 0f 1f 44 00 0 0 <45> 85 f6 0f 88 f2 01 00 00 4d 63 ee 49 83 fd 0a 0f 83 d8 04 00 00 [ 181.869139] RSP: 0018:ffffd53c0022be18 EFLAGS: 00000246 [ 181.869140] RAX: 0000000000000000 RBX: ffff8eeb9f8bf880 RCX: 0000000000000000 [ 181.869142] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 [ 181.869143] RBP: ffffd53c0022be68 R08: 0000000000000000 R09: 0000000000000000 [ 181.869144] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff93914780 [ 181.869145] R13: 0000000000000002 R14: 0000000000000002 R15: 0000002a583b0b41 [ 181.869148] ? cpuidle_enter_state+0xca/0x710 [ 181.869151] cpuidle_enter+0x2e/0x50 [ 181.869156] call_cpuidle+0x22/0x60 [ 181.869160] do_idle+0x1dc/0x240 [ 181.869163] cpu_startup_entry+0x29/0x30 [ 181.869164] start_secondary+0x128/0x160 [ 181.869167] common_startup_64+0x13e/0x141 [ 181.869171] </TASK> [ 181.869172] ---[ end trace 0000000000000000 ]--- [ 226.924460] workqueue USBC000:00-con1: drain_workqueue() isn't complete after 10 tries [ 329.470977] ucsi_acpi USBC000:00: error -ETIMEDOUT: PPM init failed > > thanks, > > -- > heikki
On Sun, Oct 12, 2025 at 10:00:03PM +0800, Chia-Lin Kao (AceLan) wrote: > On Thu, Oct 09, 2025 at 05:08:53PM +0300, Heikki Krogerus wrote: > > On Thu, Oct 09, 2025 at 09:58:22AM +0800, Chia-Lin Kao (AceLan) wrote: > > > On Wed, Oct 08, 2025 at 01:34:35PM +0300, Heikki Krogerus wrote: > > > > On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote: > > > > > During UCSI initialization and operation, there is a race condition where > > > > > delayed work items can be scheduled but attempt to queue work after the > > > > > workqueue has been destroyed. This occurs in multiple code paths. > > > > > > > > > > The race occurs when: > > > > > 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work > > > > > 2. Connector cleanup paths call destroy_workqueue() > > > > > 3. Previously scheduled delayed work timers fire after destruction > > > > > 4. This triggers warnings and crashes in __queue_work() > > > > > > > > What warnings? > > > Here is what I got. > > > > > > Sep 24 13:24:22 ubuntu kernel: sysfs: cannot create duplicate filename '/devices/platform/USBC000:00/typec/port0/port0.0/partner' > > > Sep 24 13:24:22 ubuntu kernel: CPU: 1 UID: 0 PID: 132 Comm: kworker/u64:1 Tainted: G O 6.14.0-1012-oem #12-Ubuntu > > > Sep 24 13:24:22 ubuntu kernel: Tainted: [O]=OOT_MODULE > > > Sep 24 13:24:22 ubuntu kernel: Hardware name: Dell Inc. Dell /, BIOS XXXX XX/XX/2025 > > > Sep 24 13:24:22 ubuntu kernel: Workqueue: USBC000:00-con1 ucsi_poll_worker [typec_ucsi] > > > Sep 24 13:24:22 ubuntu kernel: Call Trace: > > > Sep 24 13:24:22 ubuntu kernel: <TASK> > > > Sep 24 13:24:22 ubuntu kernel: dump_stack_lvl+0x76/0xa0 > > > Sep 24 13:24:22 ubuntu kernel: dump_stack+0x10/0x20 > > > Sep 24 13:24:22 ubuntu kernel: sysfs_warn_dup+0x8a/0xb0 > > > Sep 24 13:24:22 ubuntu kernel: sysfs_do_create_link_sd+0xf1/0x100 > > > Sep 24 13:24:22 ubuntu kernel: sysfs_create_link+0x21/0x50 > > > Sep 24 13:24:22 ubuntu kernel: typec_probe+0x7e/0x100 [typec] > > > Sep 24 13:24:22 ubuntu kernel: ? driver_sysfs_add+0x66/0xd0 > > > Sep 24 13:24:22 ubuntu kernel: really_probe+0xee/0x3c0 > > > Sep 24 13:24:22 ubuntu kernel: __driver_probe_device+0x8c/0x180 > > > Sep 24 13:24:22 ubuntu kernel: driver_probe_device+0x24/0xd0 > > > Sep 24 13:24:22 ubuntu kernel: __device_attach_driver+0xcd/0x170 > > > Sep 24 13:24:22 ubuntu kernel: ? _pfx__device_attach_driver+0x10/0x10 > > > Sep 24 13:24:22 ubuntu kernel: bus_for_each_drv+0x94/0xf0 > > > Sep 24 13:24:22 ubuntu kernel: __device_attach+0xb6/0x1d0 > > > Sep 24 13:24:22 ubuntu kernel: device_initial_probe+0x13/0x20 > > > Sep 24 13:24:22 ubuntu kernel: bus_probe_device+0x9f/0xb0 > > > Sep 24 13:24:22 ubuntu kernel: device_add+0x513/0x710 > > > Sep 24 13:24:22 ubuntu kernel: device_register+0x1a/0x30 > > > Sep 24 13:24:22 ubuntu kernel: typec_register_altmode+0x253/0x3a0 [typec] > > > Sep 24 13:24:22 ubuntu kernel: typec_partner_register_altmode+0xe/0x20 [typec] > > > Sep 24 13:24:22 ubuntu kernel: ucsi_register_altmode.constprop.0+0x30e/0x390 [typec_ucsi] > > > Sep 24 13:24:22 ubuntu kernel: ucsi_register_altmodes+0x162/0x250 [typec_ucsi] > > > Sep 24 13:24:22 ubuntu kernel: ucsi_check_altmodes+0x19/0xb0 [typec_ucsi] > > > Sep 24 13:24:22 ubuntu kernel: ucsi_poll_worker+0x3d/0xf0 [typec_ucsi] > > > Sep 24 13:24:22 ubuntu kernel: process_one_work+0x178/0x3d0 > > > Sep 24 13:24:22 ubuntu kernel: worker_thread+0x2de/0x410 > > > Sep 24 13:24:22 ubuntu kernel: ? __pfx_worker_thread+0x10/0x10 > > > Sep 24 13:24:22 ubuntu kernel: kthread+0xfb/0x230 > > > Sep 24 13:24:22 ubuntu kernel: ? __pfx_kthread+0x10/0x10 > > > Sep 24 13:24:22 ubuntu kernel: ret_from_fork+0x44/0x70 > > > Sep 24 13:24:22 ubuntu kernel: ? __pfx_kthread+0x10/0x10 > > > Sep 24 13:24:22 ubuntu kernel: ret_from_fork_asm+0x1a/0x30 > > > Sep 24 13:24:22 ubuntu kernel: </TASK> > > > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: failed to create symlinks > > > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: probe with driver typec-thunderbolt failed with error -17 > > > > That does not look like anything you described in the commit message? > > > > You have there an attempt to register the same alternate mode twice, > > but the workqueue seems to be very much alive when that happens. > > > > Based on the above this looks like either a race where the driver > > really ends up registering the alternate modes multiple times or, and > > more likely, the firmware is reporting the same alternate mode > > multiple times. > > > > Or am I missing something? > Here is another one. It's not immediately obvious how this one relates to typec_ucsi. > > [ 170.605181] ucsi_acpi USBC000:00: con2: failed to register alt modes > [ 181.868900] ------------[ cut here ]------------ > [ 181.868905] workqueue: cannot queue ucsi_poll_worker [typec_ucsi] on wq USBC000:00-con1 > [ 181.868918] WARNING: CPU: 1 PID: 0 at kernel/workqueue.c:2255 __queue_work+0x420/0x5a0 > ... > [ 181.869062] CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 6.17.0-rc7+ #1 PREEMPT(voluntary) > [ 181.869065] Hardware name: Dell Inc. , BIOS xx.xx.xx xx/xx/2025 > [ 181.869067] RIP: 0010:__queue_work+0x420/0x5a0 > [ 181.869070] Code: 00 00 41 83 e4 01 0f 85 57 fd ff ff 49 8b 77 18 48 8d 93 c0 00 00 00 48 c7 c7 00 8c bc 92 c6 05 27 47 68 02 01 e8 50 24 fd f > f <0f> 0b e9 32 fd ff ff 0f 0b e9 1d fd ff ff 0f 0b e9 0f fd ff ff 0f > [ 181.869072] RSP: 0018:ffffd53c000acdf8 EFLAGS: 00010046 > [ 181.869075] RAX: 0000000000000000 RBX: ffff8ecd0727f200 RCX: 0000000000000000 > [ 181.869076] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 > [ 181.869077] RBP: ffffd53c000ace38 R08: 0000000000000000 R09: 0000000000000000 > [ 181.869078] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 > [ 181.869079] R13: ffffffff913995e0 R14: ffff8ecc824387a0 R15: ffff8ecc82438780 > [ 181.869081] FS: 0000000000000000(0000) GS:ffff8eec0b92f000(0000) knlGS:0000000000000000 > [ 181.869083] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 181.869084] CR2: 000005593e67a008 CR3: 0000001f41840002 CR4: 0000000000f72ef0 > [ 181.869086] PKRU: 55555554 > [ 181.869087] Call Trace: > [ 181.869089] <IRQ> > [ 181.869093] ? sched_clock+0x10/0x30 > [ 181.869098] ? __pfx_delayed_work_timer_fn+0x10/0x10 > [ 181.869100] delayed_work_timer_fn+0x19/0x30 > [ 181.869102] call_timer_fn+0x2c/0x150 > [ 181.869106] ? __pfx_delayed_work_timer_fn+0x10/0x10 > [ 181.869108] __run_timers+0x1c6/0x2d0 > [ 181.869111] run_timer_softirq+0x8a/0x100 > [ 181.869114] handle_softirqs+0xe4/0x340 > [ 181.869118] __irq_exit_rcu+0x10e/0x130 > [ 181.869121] irq_exit_rcu+0xe/0x20 > [ 181.869124] sysvec_apic_timer_interrupt+0xa0/0xc0 > [ 181.869130] </IRQ> > [ 181.869131] <TASK> > [ 181.869132] asm_sysvec_apic_timer_interrupt+0x1b/0x20 [ 181.869135] RIP: 0010:cpuidle_enter_state+0xda/0x710 > [ 181.869137] Code: 8f f7 fe e8 78 f0 ff ff 8b 53 04 49 89 c7 0f 1f 44 00 00 31 ff e8 86 bf f5 fe 80 7d d0 00 0f 85 22 02 00 00 fb 0f 1f 44 00 0 > 0 <45> 85 f6 0f 88 f2 01 00 00 4d 63 ee 49 83 fd 0a 0f 83 d8 04 00 00 > [ 181.869139] RSP: 0018:ffffd53c0022be18 EFLAGS: 00000246 > [ 181.869140] RAX: 0000000000000000 RBX: ffff8eeb9f8bf880 RCX: 0000000000000000 > [ 181.869142] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 > [ 181.869143] RBP: ffffd53c0022be68 R08: 0000000000000000 R09: 0000000000000000 > [ 181.869144] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff93914780 > [ 181.869145] R13: 0000000000000002 R14: 0000000000000002 R15: 0000002a583b0b41 > [ 181.869148] ? cpuidle_enter_state+0xca/0x710 > [ 181.869151] cpuidle_enter+0x2e/0x50 > [ 181.869156] call_cpuidle+0x22/0x60 > [ 181.869160] do_idle+0x1dc/0x240 > [ 181.869163] cpu_startup_entry+0x29/0x30 > [ 181.869164] start_secondary+0x128/0x160 > [ 181.869167] common_startup_64+0x13e/0x141 > [ 181.869171] </TASK> > [ 181.869172] ---[ end trace 0000000000000000 ]--- > [ 226.924460] workqueue USBC000:00-con1: drain_workqueue() isn't complete after 10 tries > [ 329.470977] ucsi_acpi USBC000:00: error -ETIMEDOUT: PPM init failed Okay, so to me it looks like there are two separate issues here. The first one looks like an EC (or PD controller) firmware related issue, where the response to the GET_ALTERNATE_MODES command is getting corrupted for some reason, and second is this race that you see as a consequence from the first FW related issue. So this patch is for the second issue - the race. I'll wait for the v2 from you guys. But I want to solve both issues. thanks, -- heikki
On Mon, Oct 13, 2025 at 04:00:17PM +0300, Heikki Krogerus wrote: > On Sun, Oct 12, 2025 at 10:00:03PM +0800, Chia-Lin Kao (AceLan) wrote: > > On Thu, Oct 09, 2025 at 05:08:53PM +0300, Heikki Krogerus wrote: > > > On Thu, Oct 09, 2025 at 09:58:22AM +0800, Chia-Lin Kao (AceLan) wrote: > > > > On Wed, Oct 08, 2025 at 01:34:35PM +0300, Heikki Krogerus wrote: > > > > > On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote: > > > > > > During UCSI initialization and operation, there is a race condition where > > > > > > delayed work items can be scheduled but attempt to queue work after the > > > > > > workqueue has been destroyed. This occurs in multiple code paths. > > > > > > > > > > > > The race occurs when: > > > > > > 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work > > > > > > 2. Connector cleanup paths call destroy_workqueue() > > > > > > 3. Previously scheduled delayed work timers fire after destruction > > > > > > 4. This triggers warnings and crashes in __queue_work() > > > > > > > > > > What warnings? > > > > Here is what I got. > > > > > > > > Sep 24 13:24:22 ubuntu kernel: sysfs: cannot create duplicate filename '/devices/platform/USBC000:00/typec/port0/port0.0/partner' > > > > Sep 24 13:24:22 ubuntu kernel: CPU: 1 UID: 0 PID: 132 Comm: kworker/u64:1 Tainted: G O 6.14.0-1012-oem #12-Ubuntu > > > > Sep 24 13:24:22 ubuntu kernel: Tainted: [O]=OOT_MODULE > > > > Sep 24 13:24:22 ubuntu kernel: Hardware name: Dell Inc. Dell /, BIOS XXXX XX/XX/2025 > > > > Sep 24 13:24:22 ubuntu kernel: Workqueue: USBC000:00-con1 ucsi_poll_worker [typec_ucsi] > > > > Sep 24 13:24:22 ubuntu kernel: Call Trace: > > > > Sep 24 13:24:22 ubuntu kernel: <TASK> > > > > Sep 24 13:24:22 ubuntu kernel: dump_stack_lvl+0x76/0xa0 > > > > Sep 24 13:24:22 ubuntu kernel: dump_stack+0x10/0x20 > > > > Sep 24 13:24:22 ubuntu kernel: sysfs_warn_dup+0x8a/0xb0 > > > > Sep 24 13:24:22 ubuntu kernel: sysfs_do_create_link_sd+0xf1/0x100 > > > > Sep 24 13:24:22 ubuntu kernel: sysfs_create_link+0x21/0x50 > > > > Sep 24 13:24:22 ubuntu kernel: typec_probe+0x7e/0x100 [typec] > > > > Sep 24 13:24:22 ubuntu kernel: ? driver_sysfs_add+0x66/0xd0 > > > > Sep 24 13:24:22 ubuntu kernel: really_probe+0xee/0x3c0 > > > > Sep 24 13:24:22 ubuntu kernel: __driver_probe_device+0x8c/0x180 > > > > Sep 24 13:24:22 ubuntu kernel: driver_probe_device+0x24/0xd0 > > > > Sep 24 13:24:22 ubuntu kernel: __device_attach_driver+0xcd/0x170 > > > > Sep 24 13:24:22 ubuntu kernel: ? _pfx__device_attach_driver+0x10/0x10 > > > > Sep 24 13:24:22 ubuntu kernel: bus_for_each_drv+0x94/0xf0 > > > > Sep 24 13:24:22 ubuntu kernel: __device_attach+0xb6/0x1d0 > > > > Sep 24 13:24:22 ubuntu kernel: device_initial_probe+0x13/0x20 > > > > Sep 24 13:24:22 ubuntu kernel: bus_probe_device+0x9f/0xb0 > > > > Sep 24 13:24:22 ubuntu kernel: device_add+0x513/0x710 > > > > Sep 24 13:24:22 ubuntu kernel: device_register+0x1a/0x30 > > > > Sep 24 13:24:22 ubuntu kernel: typec_register_altmode+0x253/0x3a0 [typec] > > > > Sep 24 13:24:22 ubuntu kernel: typec_partner_register_altmode+0xe/0x20 [typec] > > > > Sep 24 13:24:22 ubuntu kernel: ucsi_register_altmode.constprop.0+0x30e/0x390 [typec_ucsi] > > > > Sep 24 13:24:22 ubuntu kernel: ucsi_register_altmodes+0x162/0x250 [typec_ucsi] > > > > Sep 24 13:24:22 ubuntu kernel: ucsi_check_altmodes+0x19/0xb0 [typec_ucsi] > > > > Sep 24 13:24:22 ubuntu kernel: ucsi_poll_worker+0x3d/0xf0 [typec_ucsi] > > > > Sep 24 13:24:22 ubuntu kernel: process_one_work+0x178/0x3d0 > > > > Sep 24 13:24:22 ubuntu kernel: worker_thread+0x2de/0x410 > > > > Sep 24 13:24:22 ubuntu kernel: ? __pfx_worker_thread+0x10/0x10 > > > > Sep 24 13:24:22 ubuntu kernel: kthread+0xfb/0x230 > > > > Sep 24 13:24:22 ubuntu kernel: ? __pfx_kthread+0x10/0x10 > > > > Sep 24 13:24:22 ubuntu kernel: ret_from_fork+0x44/0x70 > > > > Sep 24 13:24:22 ubuntu kernel: ? __pfx_kthread+0x10/0x10 > > > > Sep 24 13:24:22 ubuntu kernel: ret_from_fork_asm+0x1a/0x30 > > > > Sep 24 13:24:22 ubuntu kernel: </TASK> > > > > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: failed to create symlinks > > > > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: probe with driver typec-thunderbolt failed with error -17 > > > > > > That does not look like anything you described in the commit message? > > > > > > You have there an attempt to register the same alternate mode twice, > > > but the workqueue seems to be very much alive when that happens. > > > > > > Based on the above this looks like either a race where the driver > > > really ends up registering the alternate modes multiple times or, and > > > more likely, the firmware is reporting the same alternate mode > > > multiple times. > > > > > > Or am I missing something? > > Here is another one. It's not immediately obvious how this one relates to typec_ucsi. > > > > [ 170.605181] ucsi_acpi USBC000:00: con2: failed to register alt modes > > [ 181.868900] ------------[ cut here ]------------ > > [ 181.868905] workqueue: cannot queue ucsi_poll_worker [typec_ucsi] on wq USBC000:00-con1 > > [ 181.868918] WARNING: CPU: 1 PID: 0 at kernel/workqueue.c:2255 __queue_work+0x420/0x5a0 > > ... > > [ 181.869062] CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 6.17.0-rc7+ #1 PREEMPT(voluntary) > > [ 181.869065] Hardware name: Dell Inc. , BIOS xx.xx.xx xx/xx/2025 > > [ 181.869067] RIP: 0010:__queue_work+0x420/0x5a0 > > [ 181.869070] Code: 00 00 41 83 e4 01 0f 85 57 fd ff ff 49 8b 77 18 48 8d 93 c0 00 00 00 48 c7 c7 00 8c bc 92 c6 05 27 47 68 02 01 e8 50 24 fd f > > f <0f> 0b e9 32 fd ff ff 0f 0b e9 1d fd ff ff 0f 0b e9 0f fd ff ff 0f > > [ 181.869072] RSP: 0018:ffffd53c000acdf8 EFLAGS: 00010046 > > [ 181.869075] RAX: 0000000000000000 RBX: ffff8ecd0727f200 RCX: 0000000000000000 > > [ 181.869076] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 > > [ 181.869077] RBP: ffffd53c000ace38 R08: 0000000000000000 R09: 0000000000000000 > > [ 181.869078] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 > > [ 181.869079] R13: ffffffff913995e0 R14: ffff8ecc824387a0 R15: ffff8ecc82438780 > > [ 181.869081] FS: 0000000000000000(0000) GS:ffff8eec0b92f000(0000) knlGS:0000000000000000 > > [ 181.869083] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > > [ 181.869084] CR2: 000005593e67a008 CR3: 0000001f41840002 CR4: 0000000000f72ef0 > > [ 181.869086] PKRU: 55555554 > > [ 181.869087] Call Trace: > > [ 181.869089] <IRQ> > > [ 181.869093] ? sched_clock+0x10/0x30 > > [ 181.869098] ? __pfx_delayed_work_timer_fn+0x10/0x10 > > [ 181.869100] delayed_work_timer_fn+0x19/0x30 > > [ 181.869102] call_timer_fn+0x2c/0x150 > > [ 181.869106] ? __pfx_delayed_work_timer_fn+0x10/0x10 > > [ 181.869108] __run_timers+0x1c6/0x2d0 > > [ 181.869111] run_timer_softirq+0x8a/0x100 > > [ 181.869114] handle_softirqs+0xe4/0x340 > > [ 181.869118] __irq_exit_rcu+0x10e/0x130 > > [ 181.869121] irq_exit_rcu+0xe/0x20 > > [ 181.869124] sysvec_apic_timer_interrupt+0xa0/0xc0 > > [ 181.869130] </IRQ> > > [ 181.869131] <TASK> > > [ 181.869132] asm_sysvec_apic_timer_interrupt+0x1b/0x20 [ 181.869135] RIP: 0010:cpuidle_enter_state+0xda/0x710 > > [ 181.869137] Code: 8f f7 fe e8 78 f0 ff ff 8b 53 04 49 89 c7 0f 1f 44 00 00 31 ff e8 86 bf f5 fe 80 7d d0 00 0f 85 22 02 00 00 fb 0f 1f 44 00 0 > > 0 <45> 85 f6 0f 88 f2 01 00 00 4d 63 ee 49 83 fd 0a 0f 83 d8 04 00 00 > > [ 181.869139] RSP: 0018:ffffd53c0022be18 EFLAGS: 00000246 > > [ 181.869140] RAX: 0000000000000000 RBX: ffff8eeb9f8bf880 RCX: 0000000000000000 > > [ 181.869142] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 > > [ 181.869143] RBP: ffffd53c0022be68 R08: 0000000000000000 R09: 0000000000000000 > > [ 181.869144] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff93914780 > > [ 181.869145] R13: 0000000000000002 R14: 0000000000000002 R15: 0000002a583b0b41 > > [ 181.869148] ? cpuidle_enter_state+0xca/0x710 > > [ 181.869151] cpuidle_enter+0x2e/0x50 > > [ 181.869156] call_cpuidle+0x22/0x60 > > [ 181.869160] do_idle+0x1dc/0x240 > > [ 181.869163] cpu_startup_entry+0x29/0x30 > > [ 181.869164] start_secondary+0x128/0x160 > > [ 181.869167] common_startup_64+0x13e/0x141 > > [ 181.869171] </TASK> > > [ 181.869172] ---[ end trace 0000000000000000 ]--- > > [ 226.924460] workqueue USBC000:00-con1: drain_workqueue() isn't complete after 10 tries > > [ 329.470977] ucsi_acpi USBC000:00: error -ETIMEDOUT: PPM init failed > > Okay, so to me it looks like there are two separate issues here. Yes, there are 2 issues. > > The first one looks like an EC (or PD controller) firmware related > issue, where the response to the GET_ALTERNATE_MODES command is > getting corrupted for some reason, and second is this race that you > see as a consequence from the first FW related issue. > > So this patch is for the second issue - the race. I'll wait for the v2 > from you guys. But I want to solve both issues. Here is v2 https://lkml.org/lkml/2025/10/13/312 And I have a patch to fix the second issue, will submit it later. > > thanks, > > -- > heikki
On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote:
> During UCSI initialization and operation, there is a race condition where
> delayed work items can be scheduled but attempt to queue work after the
> workqueue has been destroyed. This occurs in multiple code paths.
>
> The race occurs when:
> 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work
> 2. Connector cleanup paths call destroy_workqueue()
> 3. Previously scheduled delayed work timers fire after destruction
> 4. This triggers warnings and crashes in __queue_work()
>
> The issue is timing-sensitive and typically manifests when:
> - Port registration fails due to PPM timing issues
> - System shutdown/cleanup occurs with pending delayed work
> - Module removal races with active delayed work
>
> Fix this by:
> 1. Creating ucsi_destroy_connector_wq() helper function that safely
> cancels all pending delayed work before destroying workqueues
> 2. Applying the safe cleanup to all three workqueue destruction paths:
> - ucsi_register_port() error path
> - ucsi_init() error path
> - ucsi_unregister() cleanup path
>
> This prevents both the initial queueing on destroyed workqueues and
> retry attempts from running workers, eliminating the timer races.
>
> Fixes: b9aa02ca39a4 ("usb: typec: ucsi: Add polling mechanism for partner tasks like alt mode checking")
> Cc: stable@vger.kernel.org
> Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
> ---
> drivers/usb/typec/ucsi/ucsi.c | 50 ++++++++++++++++++++++-------------
> 1 file changed, 31 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
> index 5ba3a6c81964..1f71c9983163 100644
> --- a/drivers/usb/typec/ucsi/ucsi.c
> +++ b/drivers/usb/typec/ucsi/ucsi.c
> @@ -283,6 +283,33 @@ static void ucsi_poll_worker(struct work_struct *work)
> mutex_unlock(&con->lock);
> }
>
> +/**
> + * ucsi_destroy_connector_wq - Safely destroy connector workqueue
> + * @con: UCSI connector
> + *
> + * Cancel all pending delayed work and destroy the workqueue to prevent
> + * timer races where delayed work tries to queue on destroyed workqueue.
> + */
> +static void ucsi_destroy_connector_wq(struct ucsi_connector *con)
> +{
> + struct ucsi_work *uwork, *tmp;
> +
> + if (!con->wq)
> + return;
> +
> + /* Cancel any pending delayed work before destroying workqueue */
> + mutex_lock(&con->lock);
> + list_for_each_entry_safe(uwork, tmp, &con->partner_tasks, node) {
> + cancel_delayed_work_sync(&uwork->work);
> + list_del(&uwork->node);
> + kfree(uwork);
> + }
> + mutex_unlock(&con->lock);
It introduces a deadlock here.
- ucsi_destroy_connector_wq() holds con->lock and calls cancel_delayed_work_sync()
- ucsi_poll_worker() (the work being cancelled) also tries to acquire con->lock
I'll submit a v2 to fix this issue.
[ 246.874552] INFO: task kworker/17:0:125 blocked for more than 122 seconds.
[ 246.874565] Not tainted 6.14.0-2014-oem #14
[ 246.874569] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 246.874571] task:kworker/17:0 state:D stack:0 pid:125 tgid:125 ppid:2 task_flags:0x4208060 flags:0x00004000
[ 246.874577] Workqueue: events_long ucsi_init_work [typec_ucsi]
[ 246.874592] Call Trace:
[ 246.874594] <TASK>
[ 246.874598] __schedule+0x2cf/0x640
[ 246.874605] schedule+0x29/0xd0
[ 246.874608] schedule_timeout+0xfb/0x110
[ 246.874611] __wait_for_common+0x91/0x190
[ 246.874614] ? __pfx_schedule_timeout+0x10/0x10
[ 246.874617] wait_for_completion+0x24/0x40
[ 246.874620] __flush_work+0x86/0xe0
[ 246.874624] ? __pfx_wq_barrier_func+0x10/0x10
[ 246.874629] cancel_delayed_work_sync+0x76/0x80
[ 246.874633] ucsi_destroy_connector_wq.part.0+0x61/0xd0 [typec_ucsi]
[ 246.874638] ucsi_init+0x27a/0x330 [typec_ucsi]
[ 246.874643] ucsi_init_work+0x18/0x90 [typec_ucsi]
[ 246.874647] process_one_work+0x178/0x3d0
[ 246.874650] worker_thread+0x2de/0x410
[ 246.874653] ? __pfx_worker_thread+0x10/0x10
[ 246.874657] kthread+0xfb/0x230
[ 246.874659] ? __pfx_kthread+0x10/0x10
[ 246.874662] ret_from_fork+0x44/0x70
[ 246.874665] ? __pfx_kthread+0x10/0x10
[ 246.874667] ret_from_fork_asm+0x1a/0x30
[ 246.874672] </TASK>
> +
> + destroy_workqueue(con->wq);
> + con->wq = NULL;
> +}
> +
> static int ucsi_partner_task(struct ucsi_connector *con,
> int (*cb)(struct ucsi_connector *),
> int retries, unsigned long delay)
> @@ -1798,10 +1825,8 @@ static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con)
> out_unlock:
> mutex_unlock(&con->lock);
>
> - if (ret && con->wq) {
> - destroy_workqueue(con->wq);
> - con->wq = NULL;
> - }
> + if (ret)
> + ucsi_destroy_connector_wq(con);
>
> return ret;
> }
> @@ -1921,8 +1946,7 @@ static int ucsi_init(struct ucsi *ucsi)
>
> err_unregister:
> for (con = connector; con->port; con++) {
> - if (con->wq)
> - destroy_workqueue(con->wq);
> + ucsi_destroy_connector_wq(con);
> ucsi_unregister_partner(con);
> ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON);
> ucsi_unregister_port_psy(con);
> @@ -2144,19 +2168,7 @@ void ucsi_unregister(struct ucsi *ucsi)
> for (i = 0; i < ucsi->cap.num_connectors; i++) {
> cancel_work_sync(&ucsi->connector[i].work);
>
> - if (ucsi->connector[i].wq) {
> - struct ucsi_work *uwork;
> -
> - mutex_lock(&ucsi->connector[i].lock);
> - /*
> - * queue delayed items immediately so they can execute
> - * and free themselves before the wq is destroyed
> - */
> - list_for_each_entry(uwork, &ucsi->connector[i].partner_tasks, node)
> - mod_delayed_work(ucsi->connector[i].wq, &uwork->work, 0);
> - mutex_unlock(&ucsi->connector[i].lock);
> - destroy_workqueue(ucsi->connector[i].wq);
> - }
> + ucsi_destroy_connector_wq(&ucsi->connector[i]);
>
> ucsi_unregister_partner(&ucsi->connector[i]);
> ucsi_unregister_altmodes(&ucsi->connector[i],
> --
> 2.43.0
>
© 2016 - 2026 Red Hat, Inc.