[PATCH] usb: typec: ucsi: Fix workqueue destruction race during connector cleanup

Chia-Lin Kao (AceLan) posted 1 patch 4 months, 1 week ago
There is a newer version of this series
drivers/usb/typec/ucsi/ucsi.c | 50 ++++++++++++++++++++++-------------
1 file changed, 31 insertions(+), 19 deletions(-)
[PATCH] usb: typec: ucsi: Fix workqueue destruction race during connector cleanup
Posted by Chia-Lin Kao (AceLan) 4 months, 1 week ago
During UCSI initialization and operation, there is a race condition where
delayed work items can be scheduled but attempt to queue work after the
workqueue has been destroyed. This occurs in multiple code paths.

The race occurs when:
1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work
2. Connector cleanup paths call destroy_workqueue()
3. Previously scheduled delayed work timers fire after destruction
4. This triggers warnings and crashes in __queue_work()

The issue is timing-sensitive and typically manifests when:
- Port registration fails due to PPM timing issues
- System shutdown/cleanup occurs with pending delayed work
- Module removal races with active delayed work

Fix this by:
1. Creating ucsi_destroy_connector_wq() helper function that safely
   cancels all pending delayed work before destroying workqueues
2. Applying the safe cleanup to all three workqueue destruction paths:
   - ucsi_register_port() error path
   - ucsi_init() error path
   - ucsi_unregister() cleanup path

This prevents both the initial queueing on destroyed workqueues and
retry attempts from running workers, eliminating the timer races.

Fixes: b9aa02ca39a4 ("usb: typec: ucsi: Add polling mechanism for partner tasks like alt mode checking")
Cc: stable@vger.kernel.org
Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
---
 drivers/usb/typec/ucsi/ucsi.c | 50 ++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 5ba3a6c81964..1f71c9983163 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -283,6 +283,33 @@ static void ucsi_poll_worker(struct work_struct *work)
 	mutex_unlock(&con->lock);
 }
 
+/**
+ * ucsi_destroy_connector_wq - Safely destroy connector workqueue
+ * @con: UCSI connector
+ *
+ * Cancel all pending delayed work and destroy the workqueue to prevent
+ * timer races where delayed work tries to queue on destroyed workqueue.
+ */
+static void ucsi_destroy_connector_wq(struct ucsi_connector *con)
+{
+	struct ucsi_work *uwork, *tmp;
+
+	if (!con->wq)
+		return;
+
+	/* Cancel any pending delayed work before destroying workqueue */
+	mutex_lock(&con->lock);
+	list_for_each_entry_safe(uwork, tmp, &con->partner_tasks, node) {
+		cancel_delayed_work_sync(&uwork->work);
+		list_del(&uwork->node);
+		kfree(uwork);
+	}
+	mutex_unlock(&con->lock);
+
+	destroy_workqueue(con->wq);
+	con->wq = NULL;
+}
+
 static int ucsi_partner_task(struct ucsi_connector *con,
 			     int (*cb)(struct ucsi_connector *),
 			     int retries, unsigned long delay)
@@ -1798,10 +1825,8 @@ static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con)
 out_unlock:
 	mutex_unlock(&con->lock);
 
-	if (ret && con->wq) {
-		destroy_workqueue(con->wq);
-		con->wq = NULL;
-	}
+	if (ret)
+		ucsi_destroy_connector_wq(con);
 
 	return ret;
 }
@@ -1921,8 +1946,7 @@ static int ucsi_init(struct ucsi *ucsi)
 
 err_unregister:
 	for (con = connector; con->port; con++) {
-		if (con->wq)
-			destroy_workqueue(con->wq);
+		ucsi_destroy_connector_wq(con);
 		ucsi_unregister_partner(con);
 		ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON);
 		ucsi_unregister_port_psy(con);
@@ -2144,19 +2168,7 @@ void ucsi_unregister(struct ucsi *ucsi)
 	for (i = 0; i < ucsi->cap.num_connectors; i++) {
 		cancel_work_sync(&ucsi->connector[i].work);
 
-		if (ucsi->connector[i].wq) {
-			struct ucsi_work *uwork;
-
-			mutex_lock(&ucsi->connector[i].lock);
-			/*
-			 * queue delayed items immediately so they can execute
-			 * and free themselves before the wq is destroyed
-			 */
-			list_for_each_entry(uwork, &ucsi->connector[i].partner_tasks, node)
-				mod_delayed_work(ucsi->connector[i].wq, &uwork->work, 0);
-			mutex_unlock(&ucsi->connector[i].lock);
-			destroy_workqueue(ucsi->connector[i].wq);
-		}
+		ucsi_destroy_connector_wq(&ucsi->connector[i]);
 
 		ucsi_unregister_partner(&ucsi->connector[i]);
 		ucsi_unregister_altmodes(&ucsi->connector[i],
-- 
2.43.0
Re: [PATCH] usb: typec: ucsi: Fix workqueue destruction race during connector cleanup
Posted by Heikki Krogerus 4 months ago
On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote:
> During UCSI initialization and operation, there is a race condition where
> delayed work items can be scheduled but attempt to queue work after the
> workqueue has been destroyed. This occurs in multiple code paths.
> 
> The race occurs when:
> 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work
> 2. Connector cleanup paths call destroy_workqueue()
> 3. Previously scheduled delayed work timers fire after destruction
> 4. This triggers warnings and crashes in __queue_work()

What warnings?

-- 
heikki
Re: [PATCH] usb: typec: ucsi: Fix workqueue destruction race during connector cleanup
Posted by Chia-Lin Kao (AceLan) 4 months ago
On Wed, Oct 08, 2025 at 01:34:35PM +0300, Heikki Krogerus wrote:
> On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote:
> > During UCSI initialization and operation, there is a race condition where
> > delayed work items can be scheduled but attempt to queue work after the
> > workqueue has been destroyed. This occurs in multiple code paths.
> > 
> > The race occurs when:
> > 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work
> > 2. Connector cleanup paths call destroy_workqueue()
> > 3. Previously scheduled delayed work timers fire after destruction
> > 4. This triggers warnings and crashes in __queue_work()
> 
> What warnings?
Here is what I got.

Sep 24 13:24:22 ubuntu kernel: sysfs: cannot create duplicate filename '/devices/platform/USBC000:00/typec/port0/port0.0/partner'
Sep 24 13:24:22 ubuntu kernel: CPU: 1 UID: 0 PID: 132 Comm: kworker/u64:1 Tainted: G           O       6.14.0-1012-oem #12-Ubuntu
Sep 24 13:24:22 ubuntu kernel: Tainted: [O]=OOT_MODULE
Sep 24 13:24:22 ubuntu kernel: Hardware name: Dell Inc. Dell /, BIOS XXXX XX/XX/2025
Sep 24 13:24:22 ubuntu kernel: Workqueue: USBC000:00-con1 ucsi_poll_worker [typec_ucsi]
Sep 24 13:24:22 ubuntu kernel: Call Trace:
Sep 24 13:24:22 ubuntu kernel:  <TASK>
Sep 24 13:24:22 ubuntu kernel:  dump_stack_lvl+0x76/0xa0
Sep 24 13:24:22 ubuntu kernel:  dump_stack+0x10/0x20
Sep 24 13:24:22 ubuntu kernel:  sysfs_warn_dup+0x8a/0xb0
Sep 24 13:24:22 ubuntu kernel:  sysfs_do_create_link_sd+0xf1/0x100
Sep 24 13:24:22 ubuntu kernel:  sysfs_create_link+0x21/0x50
Sep 24 13:24:22 ubuntu kernel:  typec_probe+0x7e/0x100 [typec]
Sep 24 13:24:22 ubuntu kernel:  ? driver_sysfs_add+0x66/0xd0
Sep 24 13:24:22 ubuntu kernel:  really_probe+0xee/0x3c0
Sep 24 13:24:22 ubuntu kernel:  __driver_probe_device+0x8c/0x180
Sep 24 13:24:22 ubuntu kernel:  driver_probe_device+0x24/0xd0
Sep 24 13:24:22 ubuntu kernel:  __device_attach_driver+0xcd/0x170
Sep 24 13:24:22 ubuntu kernel:  ? _pfx__device_attach_driver+0x10/0x10
Sep 24 13:24:22 ubuntu kernel:  bus_for_each_drv+0x94/0xf0
Sep 24 13:24:22 ubuntu kernel:  __device_attach+0xb6/0x1d0
Sep 24 13:24:22 ubuntu kernel:  device_initial_probe+0x13/0x20
Sep 24 13:24:22 ubuntu kernel:  bus_probe_device+0x9f/0xb0
Sep 24 13:24:22 ubuntu kernel:  device_add+0x513/0x710
Sep 24 13:24:22 ubuntu kernel:  device_register+0x1a/0x30
Sep 24 13:24:22 ubuntu kernel:  typec_register_altmode+0x253/0x3a0 [typec]
Sep 24 13:24:22 ubuntu kernel:  typec_partner_register_altmode+0xe/0x20 [typec]
Sep 24 13:24:22 ubuntu kernel:  ucsi_register_altmode.constprop.0+0x30e/0x390 [typec_ucsi]
Sep 24 13:24:22 ubuntu kernel:  ucsi_register_altmodes+0x162/0x250 [typec_ucsi]
Sep 24 13:24:22 ubuntu kernel:  ucsi_check_altmodes+0x19/0xb0 [typec_ucsi]
Sep 24 13:24:22 ubuntu kernel:  ucsi_poll_worker+0x3d/0xf0 [typec_ucsi]
Sep 24 13:24:22 ubuntu kernel:  process_one_work+0x178/0x3d0
Sep 24 13:24:22 ubuntu kernel:  worker_thread+0x2de/0x410
Sep 24 13:24:22 ubuntu kernel:  ? __pfx_worker_thread+0x10/0x10
Sep 24 13:24:22 ubuntu kernel:  kthread+0xfb/0x230
Sep 24 13:24:22 ubuntu kernel:  ? __pfx_kthread+0x10/0x10
Sep 24 13:24:22 ubuntu kernel:  ret_from_fork+0x44/0x70
Sep 24 13:24:22 ubuntu kernel:  ? __pfx_kthread+0x10/0x10
Sep 24 13:24:22 ubuntu kernel:  ret_from_fork_asm+0x1a/0x30
Sep 24 13:24:22 ubuntu kernel:  </TASK>
Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: failed to create symlinks
Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: probe with driver typec-thunderbolt failed with error -17
> 
> -- 
> heikki
Re: [PATCH] usb: typec: ucsi: Fix workqueue destruction race during connector cleanup
Posted by Heikki Krogerus 4 months ago
On Thu, Oct 09, 2025 at 09:58:22AM +0800, Chia-Lin Kao (AceLan) wrote:
> On Wed, Oct 08, 2025 at 01:34:35PM +0300, Heikki Krogerus wrote:
> > On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > During UCSI initialization and operation, there is a race condition where
> > > delayed work items can be scheduled but attempt to queue work after the
> > > workqueue has been destroyed. This occurs in multiple code paths.
> > > 
> > > The race occurs when:
> > > 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work
> > > 2. Connector cleanup paths call destroy_workqueue()
> > > 3. Previously scheduled delayed work timers fire after destruction
> > > 4. This triggers warnings and crashes in __queue_work()
> > 
> > What warnings?
> Here is what I got.
> 
> Sep 24 13:24:22 ubuntu kernel: sysfs: cannot create duplicate filename '/devices/platform/USBC000:00/typec/port0/port0.0/partner'
> Sep 24 13:24:22 ubuntu kernel: CPU: 1 UID: 0 PID: 132 Comm: kworker/u64:1 Tainted: G           O       6.14.0-1012-oem #12-Ubuntu
> Sep 24 13:24:22 ubuntu kernel: Tainted: [O]=OOT_MODULE
> Sep 24 13:24:22 ubuntu kernel: Hardware name: Dell Inc. Dell /, BIOS XXXX XX/XX/2025
> Sep 24 13:24:22 ubuntu kernel: Workqueue: USBC000:00-con1 ucsi_poll_worker [typec_ucsi]
> Sep 24 13:24:22 ubuntu kernel: Call Trace:
> Sep 24 13:24:22 ubuntu kernel:  <TASK>
> Sep 24 13:24:22 ubuntu kernel:  dump_stack_lvl+0x76/0xa0
> Sep 24 13:24:22 ubuntu kernel:  dump_stack+0x10/0x20
> Sep 24 13:24:22 ubuntu kernel:  sysfs_warn_dup+0x8a/0xb0
> Sep 24 13:24:22 ubuntu kernel:  sysfs_do_create_link_sd+0xf1/0x100
> Sep 24 13:24:22 ubuntu kernel:  sysfs_create_link+0x21/0x50
> Sep 24 13:24:22 ubuntu kernel:  typec_probe+0x7e/0x100 [typec]
> Sep 24 13:24:22 ubuntu kernel:  ? driver_sysfs_add+0x66/0xd0
> Sep 24 13:24:22 ubuntu kernel:  really_probe+0xee/0x3c0
> Sep 24 13:24:22 ubuntu kernel:  __driver_probe_device+0x8c/0x180
> Sep 24 13:24:22 ubuntu kernel:  driver_probe_device+0x24/0xd0
> Sep 24 13:24:22 ubuntu kernel:  __device_attach_driver+0xcd/0x170
> Sep 24 13:24:22 ubuntu kernel:  ? _pfx__device_attach_driver+0x10/0x10
> Sep 24 13:24:22 ubuntu kernel:  bus_for_each_drv+0x94/0xf0
> Sep 24 13:24:22 ubuntu kernel:  __device_attach+0xb6/0x1d0
> Sep 24 13:24:22 ubuntu kernel:  device_initial_probe+0x13/0x20
> Sep 24 13:24:22 ubuntu kernel:  bus_probe_device+0x9f/0xb0
> Sep 24 13:24:22 ubuntu kernel:  device_add+0x513/0x710
> Sep 24 13:24:22 ubuntu kernel:  device_register+0x1a/0x30
> Sep 24 13:24:22 ubuntu kernel:  typec_register_altmode+0x253/0x3a0 [typec]
> Sep 24 13:24:22 ubuntu kernel:  typec_partner_register_altmode+0xe/0x20 [typec]
> Sep 24 13:24:22 ubuntu kernel:  ucsi_register_altmode.constprop.0+0x30e/0x390 [typec_ucsi]
> Sep 24 13:24:22 ubuntu kernel:  ucsi_register_altmodes+0x162/0x250 [typec_ucsi]
> Sep 24 13:24:22 ubuntu kernel:  ucsi_check_altmodes+0x19/0xb0 [typec_ucsi]
> Sep 24 13:24:22 ubuntu kernel:  ucsi_poll_worker+0x3d/0xf0 [typec_ucsi]
> Sep 24 13:24:22 ubuntu kernel:  process_one_work+0x178/0x3d0
> Sep 24 13:24:22 ubuntu kernel:  worker_thread+0x2de/0x410
> Sep 24 13:24:22 ubuntu kernel:  ? __pfx_worker_thread+0x10/0x10
> Sep 24 13:24:22 ubuntu kernel:  kthread+0xfb/0x230
> Sep 24 13:24:22 ubuntu kernel:  ? __pfx_kthread+0x10/0x10
> Sep 24 13:24:22 ubuntu kernel:  ret_from_fork+0x44/0x70
> Sep 24 13:24:22 ubuntu kernel:  ? __pfx_kthread+0x10/0x10
> Sep 24 13:24:22 ubuntu kernel:  ret_from_fork_asm+0x1a/0x30
> Sep 24 13:24:22 ubuntu kernel:  </TASK>
> Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: failed to create symlinks
> Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: probe with driver typec-thunderbolt failed with error -17

That does not look like anything you described in the commit message?

You have there an attempt to register the same alternate mode twice,
but the workqueue seems to be very much alive when that happens.

Based on the above this looks like either a race where the driver
really ends up registering the alternate modes multiple times or, and
more likely, the firmware is reporting the same alternate mode
multiple times.

Or am I missing something?

thanks,

-- 
heikki
Re: [PATCH] usb: typec: ucsi: Fix workqueue destruction race during connector cleanup
Posted by Chia-Lin Kao (AceLan) 3 months, 4 weeks ago
On Thu, Oct 09, 2025 at 05:08:53PM +0300, Heikki Krogerus wrote:
> On Thu, Oct 09, 2025 at 09:58:22AM +0800, Chia-Lin Kao (AceLan) wrote:
> > On Wed, Oct 08, 2025 at 01:34:35PM +0300, Heikki Krogerus wrote:
> > > On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > During UCSI initialization and operation, there is a race condition where
> > > > delayed work items can be scheduled but attempt to queue work after the
> > > > workqueue has been destroyed. This occurs in multiple code paths.
> > > > 
> > > > The race occurs when:
> > > > 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work
> > > > 2. Connector cleanup paths call destroy_workqueue()
> > > > 3. Previously scheduled delayed work timers fire after destruction
> > > > 4. This triggers warnings and crashes in __queue_work()
> > > 
> > > What warnings?
> > Here is what I got.
> > 
> > Sep 24 13:24:22 ubuntu kernel: sysfs: cannot create duplicate filename '/devices/platform/USBC000:00/typec/port0/port0.0/partner'
> > Sep 24 13:24:22 ubuntu kernel: CPU: 1 UID: 0 PID: 132 Comm: kworker/u64:1 Tainted: G           O       6.14.0-1012-oem #12-Ubuntu
> > Sep 24 13:24:22 ubuntu kernel: Tainted: [O]=OOT_MODULE
> > Sep 24 13:24:22 ubuntu kernel: Hardware name: Dell Inc. Dell /, BIOS XXXX XX/XX/2025
> > Sep 24 13:24:22 ubuntu kernel: Workqueue: USBC000:00-con1 ucsi_poll_worker [typec_ucsi]
> > Sep 24 13:24:22 ubuntu kernel: Call Trace:
> > Sep 24 13:24:22 ubuntu kernel:  <TASK>
> > Sep 24 13:24:22 ubuntu kernel:  dump_stack_lvl+0x76/0xa0
> > Sep 24 13:24:22 ubuntu kernel:  dump_stack+0x10/0x20
> > Sep 24 13:24:22 ubuntu kernel:  sysfs_warn_dup+0x8a/0xb0
> > Sep 24 13:24:22 ubuntu kernel:  sysfs_do_create_link_sd+0xf1/0x100
> > Sep 24 13:24:22 ubuntu kernel:  sysfs_create_link+0x21/0x50
> > Sep 24 13:24:22 ubuntu kernel:  typec_probe+0x7e/0x100 [typec]
> > Sep 24 13:24:22 ubuntu kernel:  ? driver_sysfs_add+0x66/0xd0
> > Sep 24 13:24:22 ubuntu kernel:  really_probe+0xee/0x3c0
> > Sep 24 13:24:22 ubuntu kernel:  __driver_probe_device+0x8c/0x180
> > Sep 24 13:24:22 ubuntu kernel:  driver_probe_device+0x24/0xd0
> > Sep 24 13:24:22 ubuntu kernel:  __device_attach_driver+0xcd/0x170
> > Sep 24 13:24:22 ubuntu kernel:  ? _pfx__device_attach_driver+0x10/0x10
> > Sep 24 13:24:22 ubuntu kernel:  bus_for_each_drv+0x94/0xf0
> > Sep 24 13:24:22 ubuntu kernel:  __device_attach+0xb6/0x1d0
> > Sep 24 13:24:22 ubuntu kernel:  device_initial_probe+0x13/0x20
> > Sep 24 13:24:22 ubuntu kernel:  bus_probe_device+0x9f/0xb0
> > Sep 24 13:24:22 ubuntu kernel:  device_add+0x513/0x710
> > Sep 24 13:24:22 ubuntu kernel:  device_register+0x1a/0x30
> > Sep 24 13:24:22 ubuntu kernel:  typec_register_altmode+0x253/0x3a0 [typec]
> > Sep 24 13:24:22 ubuntu kernel:  typec_partner_register_altmode+0xe/0x20 [typec]
> > Sep 24 13:24:22 ubuntu kernel:  ucsi_register_altmode.constprop.0+0x30e/0x390 [typec_ucsi]
> > Sep 24 13:24:22 ubuntu kernel:  ucsi_register_altmodes+0x162/0x250 [typec_ucsi]
> > Sep 24 13:24:22 ubuntu kernel:  ucsi_check_altmodes+0x19/0xb0 [typec_ucsi]
> > Sep 24 13:24:22 ubuntu kernel:  ucsi_poll_worker+0x3d/0xf0 [typec_ucsi]
> > Sep 24 13:24:22 ubuntu kernel:  process_one_work+0x178/0x3d0
> > Sep 24 13:24:22 ubuntu kernel:  worker_thread+0x2de/0x410
> > Sep 24 13:24:22 ubuntu kernel:  ? __pfx_worker_thread+0x10/0x10
> > Sep 24 13:24:22 ubuntu kernel:  kthread+0xfb/0x230
> > Sep 24 13:24:22 ubuntu kernel:  ? __pfx_kthread+0x10/0x10
> > Sep 24 13:24:22 ubuntu kernel:  ret_from_fork+0x44/0x70
> > Sep 24 13:24:22 ubuntu kernel:  ? __pfx_kthread+0x10/0x10
> > Sep 24 13:24:22 ubuntu kernel:  ret_from_fork_asm+0x1a/0x30
> > Sep 24 13:24:22 ubuntu kernel:  </TASK>
> > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: failed to create symlinks
> > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: probe with driver typec-thunderbolt failed with error -17
> 
> That does not look like anything you described in the commit message?
> 
> You have there an attempt to register the same alternate mode twice,
> but the workqueue seems to be very much alive when that happens.
> 
> Based on the above this looks like either a race where the driver
> really ends up registering the alternate modes multiple times or, and
> more likely, the firmware is reporting the same alternate mode
> multiple times.
> 
> Or am I missing something?
Here is another one. It's not immediately obvious how this one relates to typec_ucsi.

[  170.605181] ucsi_acpi USBC000:00: con2: failed to register alt modes
[  181.868900] ------------[ cut here ]------------
[  181.868905] workqueue: cannot queue ucsi_poll_worker [typec_ucsi] on wq USBC000:00-con1
[  181.868918] WARNING: CPU: 1 PID: 0 at kernel/workqueue.c:2255 __queue_work+0x420/0x5a0
...
[  181.869062] CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 6.17.0-rc7+ #1 PREEMPT(voluntary)
[  181.869065] Hardware name: Dell Inc. , BIOS xx.xx.xx xx/xx/2025
[  181.869067] RIP: 0010:__queue_work+0x420/0x5a0
[  181.869070] Code: 00 00 41 83 e4 01 0f 85 57 fd ff ff 49 8b 77 18 48 8d 93 c0 00 00 00 48 c7 c7 00 8c bc 92 c6 05 27 47 68 02 01 e8 50 24 fd f
f <0f> 0b e9 32 fd ff ff 0f 0b e9 1d fd ff ff 0f 0b e9 0f fd ff ff 0f
[  181.869072] RSP: 0018:ffffd53c000acdf8 EFLAGS: 00010046
[  181.869075] RAX: 0000000000000000 RBX: ffff8ecd0727f200 RCX: 0000000000000000
[  181.869076] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[  181.869077] RBP: ffffd53c000ace38 R08: 0000000000000000 R09: 0000000000000000
[  181.869078] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[  181.869079] R13: ffffffff913995e0 R14: ffff8ecc824387a0 R15: ffff8ecc82438780
[  181.869081] FS:  0000000000000000(0000) GS:ffff8eec0b92f000(0000) knlGS:0000000000000000
[  181.869083] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  181.869084] CR2: 000005593e67a008 CR3: 0000001f41840002 CR4: 0000000000f72ef0
[  181.869086] PKRU: 55555554
[  181.869087] Call Trace:
[  181.869089]  <IRQ>
[  181.869093]  ? sched_clock+0x10/0x30
[  181.869098]  ? __pfx_delayed_work_timer_fn+0x10/0x10
[  181.869100]  delayed_work_timer_fn+0x19/0x30
[  181.869102]  call_timer_fn+0x2c/0x150
[  181.869106]  ? __pfx_delayed_work_timer_fn+0x10/0x10
[  181.869108]  __run_timers+0x1c6/0x2d0
[  181.869111]  run_timer_softirq+0x8a/0x100
[  181.869114]  handle_softirqs+0xe4/0x340
[  181.869118]  __irq_exit_rcu+0x10e/0x130
[  181.869121]  irq_exit_rcu+0xe/0x20
[  181.869124]  sysvec_apic_timer_interrupt+0xa0/0xc0
[  181.869130]  </IRQ>
[  181.869131]  <TASK>
[  181.869132]  asm_sysvec_apic_timer_interrupt+0x1b/0x20                                                                                        [  181.869135] RIP: 0010:cpuidle_enter_state+0xda/0x710
[  181.869137] Code: 8f f7 fe e8 78 f0 ff ff 8b 53 04 49 89 c7 0f 1f 44 00 00 31 ff e8 86 bf f5 fe 80 7d d0 00 0f 85 22 02 00 00 fb 0f 1f 44 00 0
0 <45> 85 f6 0f 88 f2 01 00 00 4d 63 ee 49 83 fd 0a 0f 83 d8 04 00 00
[  181.869139] RSP: 0018:ffffd53c0022be18 EFLAGS: 00000246
[  181.869140] RAX: 0000000000000000 RBX: ffff8eeb9f8bf880 RCX: 0000000000000000
[  181.869142] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000
[  181.869143] RBP: ffffd53c0022be68 R08: 0000000000000000 R09: 0000000000000000
[  181.869144] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff93914780
[  181.869145] R13: 0000000000000002 R14: 0000000000000002 R15: 0000002a583b0b41
[  181.869148]  ? cpuidle_enter_state+0xca/0x710
[  181.869151]  cpuidle_enter+0x2e/0x50
[  181.869156]  call_cpuidle+0x22/0x60
[  181.869160]  do_idle+0x1dc/0x240
[  181.869163]  cpu_startup_entry+0x29/0x30
[  181.869164]  start_secondary+0x128/0x160
[  181.869167]  common_startup_64+0x13e/0x141
[  181.869171]  </TASK>
[  181.869172] ---[ end trace 0000000000000000 ]---
[  226.924460] workqueue USBC000:00-con1: drain_workqueue() isn't complete after 10 tries
[  329.470977] ucsi_acpi USBC000:00: error -ETIMEDOUT: PPM init failed

> 
> thanks,
> 
> -- 
> heikki
Re: [PATCH] usb: typec: ucsi: Fix workqueue destruction race during connector cleanup
Posted by Heikki Krogerus 3 months, 3 weeks ago
On Sun, Oct 12, 2025 at 10:00:03PM +0800, Chia-Lin Kao (AceLan) wrote:
> On Thu, Oct 09, 2025 at 05:08:53PM +0300, Heikki Krogerus wrote:
> > On Thu, Oct 09, 2025 at 09:58:22AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > On Wed, Oct 08, 2025 at 01:34:35PM +0300, Heikki Krogerus wrote:
> > > > On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > During UCSI initialization and operation, there is a race condition where
> > > > > delayed work items can be scheduled but attempt to queue work after the
> > > > > workqueue has been destroyed. This occurs in multiple code paths.
> > > > > 
> > > > > The race occurs when:
> > > > > 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work
> > > > > 2. Connector cleanup paths call destroy_workqueue()
> > > > > 3. Previously scheduled delayed work timers fire after destruction
> > > > > 4. This triggers warnings and crashes in __queue_work()
> > > > 
> > > > What warnings?
> > > Here is what I got.
> > > 
> > > Sep 24 13:24:22 ubuntu kernel: sysfs: cannot create duplicate filename '/devices/platform/USBC000:00/typec/port0/port0.0/partner'
> > > Sep 24 13:24:22 ubuntu kernel: CPU: 1 UID: 0 PID: 132 Comm: kworker/u64:1 Tainted: G           O       6.14.0-1012-oem #12-Ubuntu
> > > Sep 24 13:24:22 ubuntu kernel: Tainted: [O]=OOT_MODULE
> > > Sep 24 13:24:22 ubuntu kernel: Hardware name: Dell Inc. Dell /, BIOS XXXX XX/XX/2025
> > > Sep 24 13:24:22 ubuntu kernel: Workqueue: USBC000:00-con1 ucsi_poll_worker [typec_ucsi]
> > > Sep 24 13:24:22 ubuntu kernel: Call Trace:
> > > Sep 24 13:24:22 ubuntu kernel:  <TASK>
> > > Sep 24 13:24:22 ubuntu kernel:  dump_stack_lvl+0x76/0xa0
> > > Sep 24 13:24:22 ubuntu kernel:  dump_stack+0x10/0x20
> > > Sep 24 13:24:22 ubuntu kernel:  sysfs_warn_dup+0x8a/0xb0
> > > Sep 24 13:24:22 ubuntu kernel:  sysfs_do_create_link_sd+0xf1/0x100
> > > Sep 24 13:24:22 ubuntu kernel:  sysfs_create_link+0x21/0x50
> > > Sep 24 13:24:22 ubuntu kernel:  typec_probe+0x7e/0x100 [typec]
> > > Sep 24 13:24:22 ubuntu kernel:  ? driver_sysfs_add+0x66/0xd0
> > > Sep 24 13:24:22 ubuntu kernel:  really_probe+0xee/0x3c0
> > > Sep 24 13:24:22 ubuntu kernel:  __driver_probe_device+0x8c/0x180
> > > Sep 24 13:24:22 ubuntu kernel:  driver_probe_device+0x24/0xd0
> > > Sep 24 13:24:22 ubuntu kernel:  __device_attach_driver+0xcd/0x170
> > > Sep 24 13:24:22 ubuntu kernel:  ? _pfx__device_attach_driver+0x10/0x10
> > > Sep 24 13:24:22 ubuntu kernel:  bus_for_each_drv+0x94/0xf0
> > > Sep 24 13:24:22 ubuntu kernel:  __device_attach+0xb6/0x1d0
> > > Sep 24 13:24:22 ubuntu kernel:  device_initial_probe+0x13/0x20
> > > Sep 24 13:24:22 ubuntu kernel:  bus_probe_device+0x9f/0xb0
> > > Sep 24 13:24:22 ubuntu kernel:  device_add+0x513/0x710
> > > Sep 24 13:24:22 ubuntu kernel:  device_register+0x1a/0x30
> > > Sep 24 13:24:22 ubuntu kernel:  typec_register_altmode+0x253/0x3a0 [typec]
> > > Sep 24 13:24:22 ubuntu kernel:  typec_partner_register_altmode+0xe/0x20 [typec]
> > > Sep 24 13:24:22 ubuntu kernel:  ucsi_register_altmode.constprop.0+0x30e/0x390 [typec_ucsi]
> > > Sep 24 13:24:22 ubuntu kernel:  ucsi_register_altmodes+0x162/0x250 [typec_ucsi]
> > > Sep 24 13:24:22 ubuntu kernel:  ucsi_check_altmodes+0x19/0xb0 [typec_ucsi]
> > > Sep 24 13:24:22 ubuntu kernel:  ucsi_poll_worker+0x3d/0xf0 [typec_ucsi]
> > > Sep 24 13:24:22 ubuntu kernel:  process_one_work+0x178/0x3d0
> > > Sep 24 13:24:22 ubuntu kernel:  worker_thread+0x2de/0x410
> > > Sep 24 13:24:22 ubuntu kernel:  ? __pfx_worker_thread+0x10/0x10
> > > Sep 24 13:24:22 ubuntu kernel:  kthread+0xfb/0x230
> > > Sep 24 13:24:22 ubuntu kernel:  ? __pfx_kthread+0x10/0x10
> > > Sep 24 13:24:22 ubuntu kernel:  ret_from_fork+0x44/0x70
> > > Sep 24 13:24:22 ubuntu kernel:  ? __pfx_kthread+0x10/0x10
> > > Sep 24 13:24:22 ubuntu kernel:  ret_from_fork_asm+0x1a/0x30
> > > Sep 24 13:24:22 ubuntu kernel:  </TASK>
> > > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: failed to create symlinks
> > > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: probe with driver typec-thunderbolt failed with error -17
> > 
> > That does not look like anything you described in the commit message?
> > 
> > You have there an attempt to register the same alternate mode twice,
> > but the workqueue seems to be very much alive when that happens.
> > 
> > Based on the above this looks like either a race where the driver
> > really ends up registering the alternate modes multiple times or, and
> > more likely, the firmware is reporting the same alternate mode
> > multiple times.
> > 
> > Or am I missing something?
> Here is another one. It's not immediately obvious how this one relates to typec_ucsi.
> 
> [  170.605181] ucsi_acpi USBC000:00: con2: failed to register alt modes
> [  181.868900] ------------[ cut here ]------------
> [  181.868905] workqueue: cannot queue ucsi_poll_worker [typec_ucsi] on wq USBC000:00-con1
> [  181.868918] WARNING: CPU: 1 PID: 0 at kernel/workqueue.c:2255 __queue_work+0x420/0x5a0
> ...
> [  181.869062] CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 6.17.0-rc7+ #1 PREEMPT(voluntary)
> [  181.869065] Hardware name: Dell Inc. , BIOS xx.xx.xx xx/xx/2025
> [  181.869067] RIP: 0010:__queue_work+0x420/0x5a0
> [  181.869070] Code: 00 00 41 83 e4 01 0f 85 57 fd ff ff 49 8b 77 18 48 8d 93 c0 00 00 00 48 c7 c7 00 8c bc 92 c6 05 27 47 68 02 01 e8 50 24 fd f
> f <0f> 0b e9 32 fd ff ff 0f 0b e9 1d fd ff ff 0f 0b e9 0f fd ff ff 0f
> [  181.869072] RSP: 0018:ffffd53c000acdf8 EFLAGS: 00010046
> [  181.869075] RAX: 0000000000000000 RBX: ffff8ecd0727f200 RCX: 0000000000000000
> [  181.869076] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
> [  181.869077] RBP: ffffd53c000ace38 R08: 0000000000000000 R09: 0000000000000000
> [  181.869078] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
> [  181.869079] R13: ffffffff913995e0 R14: ffff8ecc824387a0 R15: ffff8ecc82438780
> [  181.869081] FS:  0000000000000000(0000) GS:ffff8eec0b92f000(0000) knlGS:0000000000000000
> [  181.869083] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  181.869084] CR2: 000005593e67a008 CR3: 0000001f41840002 CR4: 0000000000f72ef0
> [  181.869086] PKRU: 55555554
> [  181.869087] Call Trace:
> [  181.869089]  <IRQ>
> [  181.869093]  ? sched_clock+0x10/0x30
> [  181.869098]  ? __pfx_delayed_work_timer_fn+0x10/0x10
> [  181.869100]  delayed_work_timer_fn+0x19/0x30
> [  181.869102]  call_timer_fn+0x2c/0x150
> [  181.869106]  ? __pfx_delayed_work_timer_fn+0x10/0x10
> [  181.869108]  __run_timers+0x1c6/0x2d0
> [  181.869111]  run_timer_softirq+0x8a/0x100
> [  181.869114]  handle_softirqs+0xe4/0x340
> [  181.869118]  __irq_exit_rcu+0x10e/0x130
> [  181.869121]  irq_exit_rcu+0xe/0x20
> [  181.869124]  sysvec_apic_timer_interrupt+0xa0/0xc0
> [  181.869130]  </IRQ>
> [  181.869131]  <TASK>
> [  181.869132]  asm_sysvec_apic_timer_interrupt+0x1b/0x20                                                                                        [  181.869135] RIP: 0010:cpuidle_enter_state+0xda/0x710
> [  181.869137] Code: 8f f7 fe e8 78 f0 ff ff 8b 53 04 49 89 c7 0f 1f 44 00 00 31 ff e8 86 bf f5 fe 80 7d d0 00 0f 85 22 02 00 00 fb 0f 1f 44 00 0
> 0 <45> 85 f6 0f 88 f2 01 00 00 4d 63 ee 49 83 fd 0a 0f 83 d8 04 00 00
> [  181.869139] RSP: 0018:ffffd53c0022be18 EFLAGS: 00000246
> [  181.869140] RAX: 0000000000000000 RBX: ffff8eeb9f8bf880 RCX: 0000000000000000
> [  181.869142] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000
> [  181.869143] RBP: ffffd53c0022be68 R08: 0000000000000000 R09: 0000000000000000
> [  181.869144] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff93914780
> [  181.869145] R13: 0000000000000002 R14: 0000000000000002 R15: 0000002a583b0b41
> [  181.869148]  ? cpuidle_enter_state+0xca/0x710
> [  181.869151]  cpuidle_enter+0x2e/0x50
> [  181.869156]  call_cpuidle+0x22/0x60
> [  181.869160]  do_idle+0x1dc/0x240
> [  181.869163]  cpu_startup_entry+0x29/0x30
> [  181.869164]  start_secondary+0x128/0x160
> [  181.869167]  common_startup_64+0x13e/0x141
> [  181.869171]  </TASK>
> [  181.869172] ---[ end trace 0000000000000000 ]---
> [  226.924460] workqueue USBC000:00-con1: drain_workqueue() isn't complete after 10 tries
> [  329.470977] ucsi_acpi USBC000:00: error -ETIMEDOUT: PPM init failed

Okay, so to me it looks like there are two separate issues here.

The first one looks like an EC (or PD controller) firmware related
issue, where the response to the GET_ALTERNATE_MODES command is
getting corrupted for some reason, and second is this race that you
see as a consequence from the first FW related issue.

So this patch is for the second issue - the race. I'll wait for the v2
from you guys. But I want to solve both issues.

thanks,

-- 
heikki
Re: [PATCH] usb: typec: ucsi: Fix workqueue destruction race during connector cleanup
Posted by Chia-Lin Kao (AceLan) 3 months, 3 weeks ago
On Mon, Oct 13, 2025 at 04:00:17PM +0300, Heikki Krogerus wrote:
> On Sun, Oct 12, 2025 at 10:00:03PM +0800, Chia-Lin Kao (AceLan) wrote:
> > On Thu, Oct 09, 2025 at 05:08:53PM +0300, Heikki Krogerus wrote:
> > > On Thu, Oct 09, 2025 at 09:58:22AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > On Wed, Oct 08, 2025 at 01:34:35PM +0300, Heikki Krogerus wrote:
> > > > > On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > During UCSI initialization and operation, there is a race condition where
> > > > > > delayed work items can be scheduled but attempt to queue work after the
> > > > > > workqueue has been destroyed. This occurs in multiple code paths.
> > > > > > 
> > > > > > The race occurs when:
> > > > > > 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work
> > > > > > 2. Connector cleanup paths call destroy_workqueue()
> > > > > > 3. Previously scheduled delayed work timers fire after destruction
> > > > > > 4. This triggers warnings and crashes in __queue_work()
> > > > > 
> > > > > What warnings?
> > > > Here is what I got.
> > > > 
> > > > Sep 24 13:24:22 ubuntu kernel: sysfs: cannot create duplicate filename '/devices/platform/USBC000:00/typec/port0/port0.0/partner'
> > > > Sep 24 13:24:22 ubuntu kernel: CPU: 1 UID: 0 PID: 132 Comm: kworker/u64:1 Tainted: G           O       6.14.0-1012-oem #12-Ubuntu
> > > > Sep 24 13:24:22 ubuntu kernel: Tainted: [O]=OOT_MODULE
> > > > Sep 24 13:24:22 ubuntu kernel: Hardware name: Dell Inc. Dell /, BIOS XXXX XX/XX/2025
> > > > Sep 24 13:24:22 ubuntu kernel: Workqueue: USBC000:00-con1 ucsi_poll_worker [typec_ucsi]
> > > > Sep 24 13:24:22 ubuntu kernel: Call Trace:
> > > > Sep 24 13:24:22 ubuntu kernel:  <TASK>
> > > > Sep 24 13:24:22 ubuntu kernel:  dump_stack_lvl+0x76/0xa0
> > > > Sep 24 13:24:22 ubuntu kernel:  dump_stack+0x10/0x20
> > > > Sep 24 13:24:22 ubuntu kernel:  sysfs_warn_dup+0x8a/0xb0
> > > > Sep 24 13:24:22 ubuntu kernel:  sysfs_do_create_link_sd+0xf1/0x100
> > > > Sep 24 13:24:22 ubuntu kernel:  sysfs_create_link+0x21/0x50
> > > > Sep 24 13:24:22 ubuntu kernel:  typec_probe+0x7e/0x100 [typec]
> > > > Sep 24 13:24:22 ubuntu kernel:  ? driver_sysfs_add+0x66/0xd0
> > > > Sep 24 13:24:22 ubuntu kernel:  really_probe+0xee/0x3c0
> > > > Sep 24 13:24:22 ubuntu kernel:  __driver_probe_device+0x8c/0x180
> > > > Sep 24 13:24:22 ubuntu kernel:  driver_probe_device+0x24/0xd0
> > > > Sep 24 13:24:22 ubuntu kernel:  __device_attach_driver+0xcd/0x170
> > > > Sep 24 13:24:22 ubuntu kernel:  ? _pfx__device_attach_driver+0x10/0x10
> > > > Sep 24 13:24:22 ubuntu kernel:  bus_for_each_drv+0x94/0xf0
> > > > Sep 24 13:24:22 ubuntu kernel:  __device_attach+0xb6/0x1d0
> > > > Sep 24 13:24:22 ubuntu kernel:  device_initial_probe+0x13/0x20
> > > > Sep 24 13:24:22 ubuntu kernel:  bus_probe_device+0x9f/0xb0
> > > > Sep 24 13:24:22 ubuntu kernel:  device_add+0x513/0x710
> > > > Sep 24 13:24:22 ubuntu kernel:  device_register+0x1a/0x30
> > > > Sep 24 13:24:22 ubuntu kernel:  typec_register_altmode+0x253/0x3a0 [typec]
> > > > Sep 24 13:24:22 ubuntu kernel:  typec_partner_register_altmode+0xe/0x20 [typec]
> > > > Sep 24 13:24:22 ubuntu kernel:  ucsi_register_altmode.constprop.0+0x30e/0x390 [typec_ucsi]
> > > > Sep 24 13:24:22 ubuntu kernel:  ucsi_register_altmodes+0x162/0x250 [typec_ucsi]
> > > > Sep 24 13:24:22 ubuntu kernel:  ucsi_check_altmodes+0x19/0xb0 [typec_ucsi]
> > > > Sep 24 13:24:22 ubuntu kernel:  ucsi_poll_worker+0x3d/0xf0 [typec_ucsi]
> > > > Sep 24 13:24:22 ubuntu kernel:  process_one_work+0x178/0x3d0
> > > > Sep 24 13:24:22 ubuntu kernel:  worker_thread+0x2de/0x410
> > > > Sep 24 13:24:22 ubuntu kernel:  ? __pfx_worker_thread+0x10/0x10
> > > > Sep 24 13:24:22 ubuntu kernel:  kthread+0xfb/0x230
> > > > Sep 24 13:24:22 ubuntu kernel:  ? __pfx_kthread+0x10/0x10
> > > > Sep 24 13:24:22 ubuntu kernel:  ret_from_fork+0x44/0x70
> > > > Sep 24 13:24:22 ubuntu kernel:  ? __pfx_kthread+0x10/0x10
> > > > Sep 24 13:24:22 ubuntu kernel:  ret_from_fork_asm+0x1a/0x30
> > > > Sep 24 13:24:22 ubuntu kernel:  </TASK>
> > > > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: failed to create symlinks
> > > > Sep 24 13:24:22 ubuntu kernel: typec-thunderbolt port0-partner.1: probe with driver typec-thunderbolt failed with error -17
> > > 
> > > That does not look like anything you described in the commit message?
> > > 
> > > You have there an attempt to register the same alternate mode twice,
> > > but the workqueue seems to be very much alive when that happens.
> > > 
> > > Based on the above this looks like either a race where the driver
> > > really ends up registering the alternate modes multiple times or, and
> > > more likely, the firmware is reporting the same alternate mode
> > > multiple times.
> > > 
> > > Or am I missing something?
> > Here is another one. It's not immediately obvious how this one relates to typec_ucsi.
> > 
> > [  170.605181] ucsi_acpi USBC000:00: con2: failed to register alt modes
> > [  181.868900] ------------[ cut here ]------------
> > [  181.868905] workqueue: cannot queue ucsi_poll_worker [typec_ucsi] on wq USBC000:00-con1
> > [  181.868918] WARNING: CPU: 1 PID: 0 at kernel/workqueue.c:2255 __queue_work+0x420/0x5a0
> > ...
> > [  181.869062] CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 6.17.0-rc7+ #1 PREEMPT(voluntary)
> > [  181.869065] Hardware name: Dell Inc. , BIOS xx.xx.xx xx/xx/2025
> > [  181.869067] RIP: 0010:__queue_work+0x420/0x5a0
> > [  181.869070] Code: 00 00 41 83 e4 01 0f 85 57 fd ff ff 49 8b 77 18 48 8d 93 c0 00 00 00 48 c7 c7 00 8c bc 92 c6 05 27 47 68 02 01 e8 50 24 fd f
> > f <0f> 0b e9 32 fd ff ff 0f 0b e9 1d fd ff ff 0f 0b e9 0f fd ff ff 0f
> > [  181.869072] RSP: 0018:ffffd53c000acdf8 EFLAGS: 00010046
> > [  181.869075] RAX: 0000000000000000 RBX: ffff8ecd0727f200 RCX: 0000000000000000
> > [  181.869076] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
> > [  181.869077] RBP: ffffd53c000ace38 R08: 0000000000000000 R09: 0000000000000000
> > [  181.869078] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
> > [  181.869079] R13: ffffffff913995e0 R14: ffff8ecc824387a0 R15: ffff8ecc82438780
> > [  181.869081] FS:  0000000000000000(0000) GS:ffff8eec0b92f000(0000) knlGS:0000000000000000
> > [  181.869083] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [  181.869084] CR2: 000005593e67a008 CR3: 0000001f41840002 CR4: 0000000000f72ef0
> > [  181.869086] PKRU: 55555554
> > [  181.869087] Call Trace:
> > [  181.869089]  <IRQ>
> > [  181.869093]  ? sched_clock+0x10/0x30
> > [  181.869098]  ? __pfx_delayed_work_timer_fn+0x10/0x10
> > [  181.869100]  delayed_work_timer_fn+0x19/0x30
> > [  181.869102]  call_timer_fn+0x2c/0x150
> > [  181.869106]  ? __pfx_delayed_work_timer_fn+0x10/0x10
> > [  181.869108]  __run_timers+0x1c6/0x2d0
> > [  181.869111]  run_timer_softirq+0x8a/0x100
> > [  181.869114]  handle_softirqs+0xe4/0x340
> > [  181.869118]  __irq_exit_rcu+0x10e/0x130
> > [  181.869121]  irq_exit_rcu+0xe/0x20
> > [  181.869124]  sysvec_apic_timer_interrupt+0xa0/0xc0
> > [  181.869130]  </IRQ>
> > [  181.869131]  <TASK>
> > [  181.869132]  asm_sysvec_apic_timer_interrupt+0x1b/0x20                                                                                        [  181.869135] RIP: 0010:cpuidle_enter_state+0xda/0x710
> > [  181.869137] Code: 8f f7 fe e8 78 f0 ff ff 8b 53 04 49 89 c7 0f 1f 44 00 00 31 ff e8 86 bf f5 fe 80 7d d0 00 0f 85 22 02 00 00 fb 0f 1f 44 00 0
> > 0 <45> 85 f6 0f 88 f2 01 00 00 4d 63 ee 49 83 fd 0a 0f 83 d8 04 00 00
> > [  181.869139] RSP: 0018:ffffd53c0022be18 EFLAGS: 00000246
> > [  181.869140] RAX: 0000000000000000 RBX: ffff8eeb9f8bf880 RCX: 0000000000000000
> > [  181.869142] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000
> > [  181.869143] RBP: ffffd53c0022be68 R08: 0000000000000000 R09: 0000000000000000
> > [  181.869144] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff93914780
> > [  181.869145] R13: 0000000000000002 R14: 0000000000000002 R15: 0000002a583b0b41
> > [  181.869148]  ? cpuidle_enter_state+0xca/0x710
> > [  181.869151]  cpuidle_enter+0x2e/0x50
> > [  181.869156]  call_cpuidle+0x22/0x60
> > [  181.869160]  do_idle+0x1dc/0x240
> > [  181.869163]  cpu_startup_entry+0x29/0x30
> > [  181.869164]  start_secondary+0x128/0x160
> > [  181.869167]  common_startup_64+0x13e/0x141
> > [  181.869171]  </TASK>
> > [  181.869172] ---[ end trace 0000000000000000 ]---
> > [  226.924460] workqueue USBC000:00-con1: drain_workqueue() isn't complete after 10 tries
> > [  329.470977] ucsi_acpi USBC000:00: error -ETIMEDOUT: PPM init failed
> 
> Okay, so to me it looks like there are two separate issues here.
Yes, there are 2 issues.
> 
> The first one looks like an EC (or PD controller) firmware related
> issue, where the response to the GET_ALTERNATE_MODES command is
> getting corrupted for some reason, and second is this race that you
> see as a consequence from the first FW related issue.
> 
> So this patch is for the second issue - the race. I'll wait for the v2
> from you guys. But I want to solve both issues.
Here is v2 https://lkml.org/lkml/2025/10/13/312

And I have a patch to fix the second issue, will submit it later.
> 
> thanks,
> 
> -- 
> heikki
Re: [PATCH] usb: typec: ucsi: Fix workqueue destruction race during connector cleanup
Posted by Chia-Lin Kao (AceLan) 4 months, 1 week ago
On Thu, Oct 02, 2025 at 09:30:26AM +0800, Chia-Lin Kao (AceLan) wrote:
> During UCSI initialization and operation, there is a race condition where
> delayed work items can be scheduled but attempt to queue work after the
> workqueue has been destroyed. This occurs in multiple code paths.
> 
> The race occurs when:
> 1. ucsi_partner_task() or ucsi_poll_worker() schedule delayed work
> 2. Connector cleanup paths call destroy_workqueue()
> 3. Previously scheduled delayed work timers fire after destruction
> 4. This triggers warnings and crashes in __queue_work()
> 
> The issue is timing-sensitive and typically manifests when:
> - Port registration fails due to PPM timing issues
> - System shutdown/cleanup occurs with pending delayed work
> - Module removal races with active delayed work
> 
> Fix this by:
> 1. Creating ucsi_destroy_connector_wq() helper function that safely
>    cancels all pending delayed work before destroying workqueues
> 2. Applying the safe cleanup to all three workqueue destruction paths:
>    - ucsi_register_port() error path
>    - ucsi_init() error path
>    - ucsi_unregister() cleanup path
> 
> This prevents both the initial queueing on destroyed workqueues and
> retry attempts from running workers, eliminating the timer races.
> 
> Fixes: b9aa02ca39a4 ("usb: typec: ucsi: Add polling mechanism for partner tasks like alt mode checking")
> Cc: stable@vger.kernel.org
> Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
> ---
>  drivers/usb/typec/ucsi/ucsi.c | 50 ++++++++++++++++++++++-------------
>  1 file changed, 31 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
> index 5ba3a6c81964..1f71c9983163 100644
> --- a/drivers/usb/typec/ucsi/ucsi.c
> +++ b/drivers/usb/typec/ucsi/ucsi.c
> @@ -283,6 +283,33 @@ static void ucsi_poll_worker(struct work_struct *work)
>  	mutex_unlock(&con->lock);
>  }
>  
> +/**
> + * ucsi_destroy_connector_wq - Safely destroy connector workqueue
> + * @con: UCSI connector
> + *
> + * Cancel all pending delayed work and destroy the workqueue to prevent
> + * timer races where delayed work tries to queue on destroyed workqueue.
> + */
> +static void ucsi_destroy_connector_wq(struct ucsi_connector *con)
> +{
> +	struct ucsi_work *uwork, *tmp;
> +
> +	if (!con->wq)
> +		return;
> +
> +	/* Cancel any pending delayed work before destroying workqueue */
> +	mutex_lock(&con->lock);
> +	list_for_each_entry_safe(uwork, tmp, &con->partner_tasks, node) {
> +		cancel_delayed_work_sync(&uwork->work);
> +		list_del(&uwork->node);
> +		kfree(uwork);
> +	}
> +	mutex_unlock(&con->lock);
It introduces a deadlock here.
  - ucsi_destroy_connector_wq() holds con->lock and calls cancel_delayed_work_sync()
  - ucsi_poll_worker() (the work being cancelled) also tries to acquire con->lock

I'll submit a v2 to fix this issue.

[  246.874552] INFO: task kworker/17:0:125 blocked for more than 122 seconds.
[  246.874565]       Not tainted 6.14.0-2014-oem #14
[  246.874569] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  246.874571] task:kworker/17:0    state:D stack:0     pid:125   tgid:125   ppid:2      task_flags:0x4208060 flags:0x00004000
[  246.874577] Workqueue: events_long ucsi_init_work [typec_ucsi]
[  246.874592] Call Trace:
[  246.874594]  <TASK>
[  246.874598]  __schedule+0x2cf/0x640
[  246.874605]  schedule+0x29/0xd0
[  246.874608]  schedule_timeout+0xfb/0x110
[  246.874611]  __wait_for_common+0x91/0x190
[  246.874614]  ? __pfx_schedule_timeout+0x10/0x10
[  246.874617]  wait_for_completion+0x24/0x40
[  246.874620]  __flush_work+0x86/0xe0
[  246.874624]  ? __pfx_wq_barrier_func+0x10/0x10
[  246.874629]  cancel_delayed_work_sync+0x76/0x80
[  246.874633]  ucsi_destroy_connector_wq.part.0+0x61/0xd0 [typec_ucsi]
[  246.874638]  ucsi_init+0x27a/0x330 [typec_ucsi]
[  246.874643]  ucsi_init_work+0x18/0x90 [typec_ucsi]
[  246.874647]  process_one_work+0x178/0x3d0
[  246.874650]  worker_thread+0x2de/0x410
[  246.874653]  ? __pfx_worker_thread+0x10/0x10
[  246.874657]  kthread+0xfb/0x230
[  246.874659]  ? __pfx_kthread+0x10/0x10
[  246.874662]  ret_from_fork+0x44/0x70
[  246.874665]  ? __pfx_kthread+0x10/0x10
[  246.874667]  ret_from_fork_asm+0x1a/0x30
[  246.874672]  </TASK>

> +
> +	destroy_workqueue(con->wq);
> +	con->wq = NULL;
> +}
> +
>  static int ucsi_partner_task(struct ucsi_connector *con,
>  			     int (*cb)(struct ucsi_connector *),
>  			     int retries, unsigned long delay)
> @@ -1798,10 +1825,8 @@ static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con)
>  out_unlock:
>  	mutex_unlock(&con->lock);
>  
> -	if (ret && con->wq) {
> -		destroy_workqueue(con->wq);
> -		con->wq = NULL;
> -	}
> +	if (ret)
> +		ucsi_destroy_connector_wq(con);
>  
>  	return ret;
>  }
> @@ -1921,8 +1946,7 @@ static int ucsi_init(struct ucsi *ucsi)
>  
>  err_unregister:
>  	for (con = connector; con->port; con++) {
> -		if (con->wq)
> -			destroy_workqueue(con->wq);
> +		ucsi_destroy_connector_wq(con);
>  		ucsi_unregister_partner(con);
>  		ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON);
>  		ucsi_unregister_port_psy(con);
> @@ -2144,19 +2168,7 @@ void ucsi_unregister(struct ucsi *ucsi)
>  	for (i = 0; i < ucsi->cap.num_connectors; i++) {
>  		cancel_work_sync(&ucsi->connector[i].work);
>  
> -		if (ucsi->connector[i].wq) {
> -			struct ucsi_work *uwork;
> -
> -			mutex_lock(&ucsi->connector[i].lock);
> -			/*
> -			 * queue delayed items immediately so they can execute
> -			 * and free themselves before the wq is destroyed
> -			 */
> -			list_for_each_entry(uwork, &ucsi->connector[i].partner_tasks, node)
> -				mod_delayed_work(ucsi->connector[i].wq, &uwork->work, 0);
> -			mutex_unlock(&ucsi->connector[i].lock);
> -			destroy_workqueue(ucsi->connector[i].wq);
> -		}
> +		ucsi_destroy_connector_wq(&ucsi->connector[i]);
>  
>  		ucsi_unregister_partner(&ucsi->connector[i]);
>  		ucsi_unregister_altmodes(&ucsi->connector[i],
> -- 
> 2.43.0
>