drivers/net/wwan/t7xx/t7xx_hif_dpmaif.c | 6 ++++++ drivers/net/wwan/t7xx/t7xx_hif_dpmaif.h | 3 +++ drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c | 16 +++++++++++++--- 3 files changed, 22 insertions(+), 3 deletions(-)
v2: Address two concerns raised in AI-assisted code review of v1:
1. [High] t7xx_dpmaif_resume() was unconditionally restoring state to
DPMAIF_STATE_PWRON regardless of the state before suspend. If the
modem had already been moved to DPMAIF_STATE_PWROFF by
t7xx_dpmaif_md_state_callback() (MD_STATE_EXCEPTION or
MD_STATE_STOPPED) prior to system suspend, resume would incorrectly
re-arm the TX kthread guard, allowing TX HW writes against a modem
the MD state machine considers stopped or in exception.
Fix: save dpmaif_ctrl->state into pre_suspend_state at the start of
t7xx_dpmaif_suspend() and restore that saved value in
t7xx_dpmaif_resume(), so a pre-suspend PWROFF is preserved across
the suspend/resume cycle.
2. [Medium] The v1 second state check before pm_runtime_resume_and_get()
only narrowed the TOCTOU window -- it did not close it. The state
field was a plain enum read and written without any lock or
READ_ONCE/WRITE_ONCE annotation. After the check passed on one CPU,
the suspend path on another CPU could still set state=PWROFF and
begin PM teardown before the kthread reached pm_runtime_resume_and_get(),
reproducing the deadlock.
Fix: introduce tx_pm_lock (struct mutex) held by the kthread across
the [state check -> pm_runtime_resume_and_get -> pm_runtime_put]
sequence. t7xx_dpmaif_suspend() acquires this lock before setting
DPMAIF_STATE_PWROFF, which serialises with any in-progress kthread
PM section and guarantees the kthread cannot enter
pm_runtime_resume_and_get() after the state flag is set.
READ_ONCE/WRITE_ONCE are added at every access point of the state
flag that crosses the suspend/resume boundary to prevent
compiler-visible tearing.
The original v1 description of the root cause and tested fix still
applies (deadlock between t7xx_dpmaif_tx_hw_push_thread calling
pm_runtime_resume_and_get() and the system PM suspend path, triggered
with ASPM L1 enabled after repeated suspend/resume cycles).
Tested: no soft lockup over 500+ suspend/resume cycles with SIM
registered and ASPM L1 enabled (previously triggered in < 300).
Fixes: 05f7e89ab ("Linux 6.19")
Signed-off-by: Tim JH Chen <tim.jh.chen@wnc.com.tw>
---
drivers/net/wwan/t7xx/t7xx_hif_dpmaif.c | 6 ++++++
drivers/net/wwan/t7xx/t7xx_hif_dpmaif.h | 3 +++
drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c | 16 +++++++++++++---
3 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif.c b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif.c
index 7ff33c1d6ac7..e1f3eeb2c947 100644
--- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif.c
+++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif.c
@@ -412,6 +412,11 @@ static int t7xx_dpmaif_suspend(struct t7xx_pci_dev *t7xx_dev, void *param)
struct dpmaif_ctrl *dpmaif_ctrl = param;
t7xx_dpmaif_tx_stop(dpmaif_ctrl);
+ dpmaif_ctrl->pre_suspend_state = READ_ONCE(dpmaif_ctrl->state);
+ mutex_lock(&dpmaif_ctrl->tx_pm_lock);
+ WRITE_ONCE(dpmaif_ctrl->state, DPMAIF_STATE_PWROFF);
+ mutex_unlock(&dpmaif_ctrl->tx_pm_lock);
+ wake_up(&dpmaif_ctrl->tx_wq);
t7xx_dpmaif_hw_stop_all_txq(&dpmaif_ctrl->hw_info);
t7xx_dpmaif_hw_stop_all_rxq(&dpmaif_ctrl->hw_info);
t7xx_dpmaif_disable_irq(dpmaif_ctrl);
@@ -451,6 +456,7 @@ static int t7xx_dpmaif_resume(struct t7xx_pci_dev *t7xx_dev, void *param)
if (!dpmaif_ctrl)
return 0;
+ WRITE_ONCE(dpmaif_ctrl->state, dpmaif_ctrl->pre_suspend_state);
t7xx_dpmaif_start_txrx_qs(dpmaif_ctrl);
t7xx_dpmaif_enable_irq(dpmaif_ctrl);
t7xx_dpmaif_unmask_dlq_intr(dpmaif_ctrl);
diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif.h b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif.h
index 0ce4505e813d..670ed2cca761 100644
--- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif.h
+++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif.h
@@ -20,6 +20,7 @@
#include <linux/bitmap.h>
#include <linux/mm_types.h>
+#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/sched.h>
#include <linux/skbuff.h>
@@ -172,6 +173,8 @@ struct dpmaif_ctrl {
struct t7xx_pci_dev *t7xx_dev;
struct md_pm_entity dpmaif_pm_entity;
enum dpmaif_state state;
+ enum dpmaif_state pre_suspend_state;
+ struct mutex tx_pm_lock;
bool dpmaif_sw_init_done;
struct dpmaif_hw_info hw_info;
struct dpmaif_tx_queue txq[DPMAIF_TXQ_NUM];
diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c
index 236d632cf591..197c0ab3fd39 100644
--- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c
+++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c
@@ -449,10 +449,10 @@ static int t7xx_dpmaif_tx_hw_push_thread(void *arg)
while (!kthread_should_stop()) {
if (t7xx_tx_lists_are_all_empty(dpmaif_ctrl) ||
- dpmaif_ctrl->state != DPMAIF_STATE_PWRON) {
+ READ_ONCE(dpmaif_ctrl->state) != DPMAIF_STATE_PWRON) {
if (wait_event_interruptible(dpmaif_ctrl->tx_wq,
(!t7xx_tx_lists_are_all_empty(dpmaif_ctrl) &&
- dpmaif_ctrl->state == DPMAIF_STATE_PWRON) ||
+ READ_ONCE(dpmaif_ctrl->state) == DPMAIF_STATE_PWRON) ||
kthread_should_stop()))
continue;
@@ -460,14 +460,23 @@ static int t7xx_dpmaif_tx_hw_push_thread(void *arg)
break;
}
+ mutex_lock(&dpmaif_ctrl->tx_pm_lock);
+ if (READ_ONCE(dpmaif_ctrl->state) != DPMAIF_STATE_PWRON) {
+ mutex_unlock(&dpmaif_ctrl->tx_pm_lock);
+ continue;
+ }
+
ret = pm_runtime_resume_and_get(dpmaif_ctrl->dev);
- if (ret < 0 && ret != -EACCES)
+ if (ret < 0 && ret != -EACCES) {
+ mutex_unlock(&dpmaif_ctrl->tx_pm_lock);
return ret;
+ }
t7xx_pci_disable_sleep(dpmaif_ctrl->t7xx_dev);
t7xx_do_tx_hw_push(dpmaif_ctrl);
t7xx_pci_enable_sleep(dpmaif_ctrl->t7xx_dev);
pm_runtime_put_autosuspend(dpmaif_ctrl->dev);
+ mutex_unlock(&dpmaif_ctrl->tx_pm_lock);
}
return 0;
@@ -475,6 +484,7 @@ static int t7xx_dpmaif_tx_hw_push_thread(void *arg)
int t7xx_dpmaif_tx_thread_init(struct dpmaif_ctrl *dpmaif_ctrl)
{
+ mutex_init(&dpmaif_ctrl->tx_pm_lock);
init_waitqueue_head(&dpmaif_ctrl->tx_wq);
dpmaif_ctrl->tx_thread = kthread_run(t7xx_dpmaif_tx_hw_push_thread,
dpmaif_ctrl, "dpmaif_tx_hw_push");
--
2.43.0
© 2016 - 2026 Red Hat, Inc.