The SOF IPC4 platform send_msg functions (hda_dsp_ipc4_send_msg,
mtl_ipc_send_msg, cnl_ipc4_send_msg) previously stored the message in
delayed_ipc_tx_msg and returned 0 when the TX register was busy. The
deferred message was supposed to be dispatched from the IRQ handler
when the DSP acknowledged the previous message.
This mechanism silently drops messages during D0i3 power transitions
because the IRQ handler never fires while the DSP is in a low-power
state. The caller then hangs in wait_event_timeout() for up to 500ms
per IPC chunk, causing multi-second audio stalls under CPU load.
Fix this by making the platform send_msg functions return -EBUSY
immediately when the TX register is busy (safe since they execute
under spin_lock_irq in sof_ipc_send_msg), and adding a bounded retry
loop with usleep_range() in ipc4_tx_msg_unlocked() which only holds
the tx_mutex (a sleepable context). The retry loop attempts up to 50
iterations with 100-200us delays, bounding the maximum busy-wait to
approximately 10ms instead of the previous 500ms timeout.
Also remove the now-dead delayed_ipc_tx_msg field from
sof_intel_hda_dev, the dispatch code, and the ack_received tracking
variable from all three IRQ thread handlers (hda_dsp_ipc4_irq_thread,
mtl_ipc_irq_thread, cnl_ipc4_irq_thread).
Signed-off-by: Cole Leavitt <cole@unwrap.rs>
---
sound/soc/sof/intel/cnl.c | 17 ++---------------
sound/soc/sof/intel/hda-ipc.c | 17 ++---------------
sound/soc/sof/intel/hda.h | 8 --------
sound/soc/sof/intel/mtl.c | 17 ++---------------
sound/soc/sof/ipc4.c | 17 +++++++++++++++--
5 files changed, 21 insertions(+), 55 deletions(-)
diff --git a/sound/soc/sof/intel/cnl.c b/sound/soc/sof/intel/cnl.c
index 0cc5725515e7..a2c6c7894a0f 100644
--- a/sound/soc/sof/intel/cnl.c
+++ b/sound/soc/sof/intel/cnl.c
@@ -37,7 +37,6 @@ irqreturn_t cnl_ipc4_irq_thread(int irq, void *context)
{
struct sof_ipc4_msg notification_data = {{ 0 }};
struct snd_sof_dev *sdev = context;
- bool ack_received = false;
bool ipc_irq = false;
u32 hipcida, hipctdr;
@@ -51,7 +50,6 @@ irqreturn_t cnl_ipc4_irq_thread(int irq, void *context)
cnl_ipc_dsp_done(sdev);
ipc_irq = true;
- ack_received = true;
}
if (hipctdr & CNL_DSP_REG_HIPCTDR_BUSY) {
@@ -101,13 +99,6 @@ irqreturn_t cnl_ipc4_irq_thread(int irq, void *context)
/* This interrupt is not shared so no need to return IRQ_NONE. */
dev_dbg_ratelimited(sdev->dev, "nothing to do in IPC IRQ thread\n");
- if (ack_received) {
- struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
-
- if (hdev->delayed_ipc_tx_msg)
- cnl_ipc4_send_msg(sdev, hdev->delayed_ipc_tx_msg);
- }
-
return IRQ_HANDLED;
}
EXPORT_SYMBOL_NS(cnl_ipc4_irq_thread, "SND_SOC_SOF_INTEL_CNL");
@@ -266,12 +257,8 @@ int cnl_ipc4_send_msg(struct snd_sof_dev *sdev, struct snd_sof_ipc_msg *msg)
struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
struct sof_ipc4_msg *msg_data = msg->msg_data;
- if (hda_ipc4_tx_is_busy(sdev)) {
- hdev->delayed_ipc_tx_msg = msg;
- return 0;
- }
-
- hdev->delayed_ipc_tx_msg = NULL;
+ if (hda_ipc4_tx_is_busy(sdev))
+ return -EBUSY;
/* send the message via mailbox */
if (msg_data->data_size)
diff --git a/sound/soc/sof/intel/hda-ipc.c b/sound/soc/sof/intel/hda-ipc.c
index 94425c510861..78449452041c 100644
--- a/sound/soc/sof/intel/hda-ipc.c
+++ b/sound/soc/sof/intel/hda-ipc.c
@@ -106,12 +106,8 @@ int hda_dsp_ipc4_send_msg(struct snd_sof_dev *sdev, struct snd_sof_ipc_msg *msg)
struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
struct sof_ipc4_msg *msg_data = msg->msg_data;
- if (hda_ipc4_tx_is_busy(sdev)) {
- hdev->delayed_ipc_tx_msg = msg;
- return 0;
- }
-
- hdev->delayed_ipc_tx_msg = NULL;
+ if (hda_ipc4_tx_is_busy(sdev))
+ return -EBUSY;
/* send the message via mailbox */
if (msg_data->data_size)
@@ -168,7 +164,6 @@ irqreturn_t hda_dsp_ipc4_irq_thread(int irq, void *context)
{
struct sof_ipc4_msg notification_data = {{ 0 }};
struct snd_sof_dev *sdev = context;
- bool ack_received = false;
bool ipc_irq = false;
u32 hipcie, hipct;
@@ -182,7 +177,6 @@ irqreturn_t hda_dsp_ipc4_irq_thread(int irq, void *context)
hda_dsp_ipc_dsp_done(sdev);
ipc_irq = true;
- ack_received = true;
}
if (hipct & HDA_DSP_REG_HIPCT_BUSY) {
@@ -236,13 +230,6 @@ irqreturn_t hda_dsp_ipc4_irq_thread(int irq, void *context)
/* This interrupt is not shared so no need to return IRQ_NONE. */
dev_dbg_ratelimited(sdev->dev, "nothing to do in IPC IRQ thread\n");
- if (ack_received) {
- struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
-
- if (hdev->delayed_ipc_tx_msg)
- hda_dsp_ipc4_send_msg(sdev, hdev->delayed_ipc_tx_msg);
- }
-
return IRQ_HANDLED;
}
EXPORT_SYMBOL_NS(hda_dsp_ipc4_irq_thread, "SND_SOC_SOF_INTEL_HDA_COMMON");
diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h
index 562fe8be79c1..ac9f76a5ef97 100644
--- a/sound/soc/sof/intel/hda.h
+++ b/sound/soc/sof/intel/hda.h
@@ -549,14 +549,6 @@ struct sof_intel_hda_dev {
/* work queue for mic privacy state change notification sending */
struct sof_ace3_mic_privacy mic_privacy;
-
- /*
- * Pointing to the IPC message if immediate sending was not possible
- * because the downlink communication channel was BUSY at the time.
- * The message will be re-tried when the channel becomes free (the ACK
- * is received from the DSP for the previous message)
- */
- struct snd_sof_ipc_msg *delayed_ipc_tx_msg;
};
static inline struct hdac_bus *sof_to_bus(struct snd_sof_dev *s)
diff --git a/sound/soc/sof/intel/mtl.c b/sound/soc/sof/intel/mtl.c
index 095dcf1a18e4..24dec128f589 100644
--- a/sound/soc/sof/intel/mtl.c
+++ b/sound/soc/sof/intel/mtl.c
@@ -101,12 +101,8 @@ static int mtl_ipc_send_msg(struct snd_sof_dev *sdev, struct snd_sof_ipc_msg *ms
struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
struct sof_ipc4_msg *msg_data = msg->msg_data;
- if (hda_ipc4_tx_is_busy(sdev)) {
- hdev->delayed_ipc_tx_msg = msg;
- return 0;
- }
-
- hdev->delayed_ipc_tx_msg = NULL;
+ if (hda_ipc4_tx_is_busy(sdev))
+ return -EBUSY;
/* send the message via mailbox */
if (msg_data->data_size)
@@ -559,7 +555,6 @@ static irqreturn_t mtl_ipc_irq_thread(int irq, void *context)
{
struct sof_ipc4_msg notification_data = {{ 0 }};
struct snd_sof_dev *sdev = context;
- bool ack_received = false;
bool ipc_irq = false;
u32 hipcida;
u32 hipctdr;
@@ -576,7 +571,6 @@ static irqreturn_t mtl_ipc_irq_thread(int irq, void *context)
mtl_ipc_dsp_done(sdev);
ipc_irq = true;
- ack_received = true;
}
if (hipctdr & MTL_DSP_REG_HFIPCXTDR_BUSY) {
@@ -628,13 +622,6 @@ static irqreturn_t mtl_ipc_irq_thread(int irq, void *context)
dev_dbg_ratelimited(sdev->dev, "nothing to do in IPC IRQ thread\n");
}
- if (ack_received) {
- struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
-
- if (hdev->delayed_ipc_tx_msg)
- mtl_ipc_send_msg(sdev, hdev->delayed_ipc_tx_msg);
- }
-
return IRQ_HANDLED;
}
diff --git a/sound/soc/sof/ipc4.c b/sound/soc/sof/ipc4.c
index a4a090e6724a..2e24308ef9cc 100644
--- a/sound/soc/sof/ipc4.c
+++ b/sound/soc/sof/ipc4.c
@@ -365,20 +365,33 @@ static int ipc4_wait_tx_done(struct snd_sof_ipc *ipc, void *reply_data)
return ret;
}
+#define SOF_IPC4_TX_BUSY_RETRIES 50
+#define SOF_IPC4_TX_BUSY_DELAY_US 100
+#define SOF_IPC4_TX_BUSY_DELAY_MAX_US 200
+
static int ipc4_tx_msg_unlocked(struct snd_sof_ipc *ipc,
void *msg_data, size_t msg_bytes,
void *reply_data, size_t reply_bytes)
{
struct sof_ipc4_msg *ipc4_msg = msg_data;
struct snd_sof_dev *sdev = ipc->sdev;
- int ret;
+ int ret, i;
if (msg_bytes > ipc->max_payload_size || reply_bytes > ipc->max_payload_size)
return -EINVAL;
sof_ipc4_log_header(sdev->dev, "ipc tx ", msg_data, true);
- ret = sof_ipc_send_msg(sdev, msg_data, msg_bytes, reply_bytes);
+ for (i = 0; i < SOF_IPC4_TX_BUSY_RETRIES; i++) {
+ ret = sof_ipc_send_msg(sdev, msg_data, msg_bytes, reply_bytes);
+ if (ret != -EBUSY)
+ break;
+ usleep_range(SOF_IPC4_TX_BUSY_DELAY_US,
+ SOF_IPC4_TX_BUSY_DELAY_MAX_US);
+ }
+ if (i == SOF_IPC4_TX_BUSY_RETRIES)
+ dev_dbg(sdev->dev, "%s: TX still busy after %d retries\n",
+ __func__, i);
if (ret) {
dev_err_ratelimited(sdev->dev,
"%s: ipc message send for %#x|%#x failed: %d\n",
--
2.52.0
On 14/02/2026 08:40, Cole Leavitt wrote:
> The SOF IPC4 platform send_msg functions (hda_dsp_ipc4_send_msg,
> mtl_ipc_send_msg, cnl_ipc4_send_msg) previously stored the message in
> delayed_ipc_tx_msg and returned 0 when the TX register was busy. The
> deferred message was supposed to be dispatched from the IRQ handler
> when the DSP acknowledged the previous message.
>
> This mechanism silently drops messages during D0i3 power transitions
> because the IRQ handler never fires while the DSP is in a low-power
> state. The caller then hangs in wait_event_timeout() for up to 500ms
> per IPC chunk, causing multi-second audio stalls under CPU load.
I do wonder how this can happen as we only send IPC messages when the fw
has booted up and thus the fw should be replying to messages.
The delayed message handling meant to handle the case when the firmware
sent the reply already, but the TX doorbell is not cleared, FW is not
yet ready to receive a new meesage (or if we send it might be lost) or
we will never send such message (and send it after the next firmware boot?).
see:
47772f905cd8 ("ASoC: SOF: Intel: ipc4: Wait for channel to be free
before sending a message")
I agree that rapid IPC sending attempt would drop messages and will only
send the last one. This would be visible with:
https://github.com/thesofproject/linux/pull/5521
> Fix this by making the platform send_msg functions return -EBUSY
> immediately when the TX register is busy (safe since they execute
> under spin_lock_irq in sof_ipc_send_msg), and adding a bounded retry
> loop with usleep_range() in ipc4_tx_msg_unlocked() which only holds
> the tx_mutex (a sleepable context). The retry loop attempts up to 50
> iterations with 100-200us delays, bounding the maximum busy-wait to
> approximately 10ms instead of the previous 500ms timeout.
>
> Also remove the now-dead delayed_ipc_tx_msg field from
> sof_intel_hda_dev, the dispatch code, and the ack_received tracking
> variable from all three IRQ thread handlers (hda_dsp_ipc4_irq_thread,
> mtl_ipc_irq_thread, cnl_ipc4_irq_thread).
>
> Signed-off-by: Cole Leavitt <cole@unwrap.rs>
> ---
...
diff --git a/sound/soc/sof/intel/mtl.c b/sound/soc/sof/intel/mtl.c
> index 095dcf1a18e4..24dec128f589 100644
> --- a/sound/soc/sof/intel/mtl.c
> +++ b/sound/soc/sof/intel/mtl.c
> @@ -101,12 +101,8 @@ static int mtl_ipc_send_msg(struct snd_sof_dev *sdev, struct snd_sof_ipc_msg *ms
> struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
> struct sof_ipc4_msg *msg_data = msg->msg_data;
>
> - if (hda_ipc4_tx_is_busy(sdev)) {
> - hdev->delayed_ipc_tx_msg = msg;
> - return 0;
> - }
> -
> - hdev->delayed_ipc_tx_msg = NULL;
> + if (hda_ipc4_tx_is_busy(sdev))
> + return -EBUSY;
>
> /* send the message via mailbox */
> if (msg_data->data_size)
> @@ -559,7 +555,6 @@ static irqreturn_t mtl_ipc_irq_thread(int irq, void *context)
> {
> struct sof_ipc4_msg notification_data = {{ 0 }};
> struct snd_sof_dev *sdev = context;
> - bool ack_received = false;
> bool ipc_irq = false;
> u32 hipcida;
> u32 hipctdr;
> @@ -576,7 +571,6 @@ static irqreturn_t mtl_ipc_irq_thread(int irq, void *context)
> mtl_ipc_dsp_done(sdev);
>
> ipc_irq = true;
> - ack_received = true;
> }
>
> if (hipctdr & MTL_DSP_REG_HFIPCXTDR_BUSY) {
> @@ -628,13 +622,6 @@ static irqreturn_t mtl_ipc_irq_thread(int irq, void *context)
> dev_dbg_ratelimited(sdev->dev, "nothing to do in IPC IRQ thread\n");
> }
>
> - if (ack_received) {
> - struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
> -
> - if (hdev->delayed_ipc_tx_msg)
> - mtl_ipc_send_msg(sdev, hdev->delayed_ipc_tx_msg);
> - }
> -
> return IRQ_HANDLED;
> }
>
> diff --git a/sound/soc/sof/ipc4.c b/sound/soc/sof/ipc4.c
> index a4a090e6724a..2e24308ef9cc 100644
> --- a/sound/soc/sof/ipc4.c
> +++ b/sound/soc/sof/ipc4.c
> @@ -365,20 +365,33 @@ static int ipc4_wait_tx_done(struct snd_sof_ipc *ipc, void *reply_data)
> return ret;
> }
>
> +#define SOF_IPC4_TX_BUSY_RETRIES 50
> +#define SOF_IPC4_TX_BUSY_DELAY_US 100
> +#define SOF_IPC4_TX_BUSY_DELAY_MAX_US 200
> +
> static int ipc4_tx_msg_unlocked(struct snd_sof_ipc *ipc,
> void *msg_data, size_t msg_bytes,
> void *reply_data, size_t reply_bytes)
> {
> struct sof_ipc4_msg *ipc4_msg = msg_data;
> struct snd_sof_dev *sdev = ipc->sdev;
> - int ret;
> + int ret, i;
>
> if (msg_bytes > ipc->max_payload_size || reply_bytes > ipc->max_payload_size)
> return -EINVAL;
>
> sof_ipc4_log_header(sdev->dev, "ipc tx ", msg_data, true);
>
> - ret = sof_ipc_send_msg(sdev, msg_data, msg_bytes, reply_bytes);
> + for (i = 0; i < SOF_IPC4_TX_BUSY_RETRIES; i++) {
> + ret = sof_ipc_send_msg(sdev, msg_data, msg_bytes, reply_bytes);
> + if (ret != -EBUSY)
> + break;
> + usleep_range(SOF_IPC4_TX_BUSY_DELAY_US,
> + SOF_IPC4_TX_BUSY_DELAY_MAX_US);
> + }
The reason why I ended up with the dead simple delay msg handling which
sends the message when the ack is received rigth away is to avoid delays
without a need of a busy loop.
Before that I had similar approach, but dropped it due to the delay it
introduced on the message sending.
The delayed message combined with IPC timeouts are an interesting
problem to tackle, the PR I mentioned:
https://github.com/thesofproject/linux/pull/5521
plus this patch
https://github.com/ujfalusi/sof-linux/commit/e6c6ca613e60c477dcc025207f9732c8ae4a1b33
worked locally for most of the time, but I give that I have not faced
with the issue that I have lost IPCs.
I'm not sure if the two approces can be somehow combined.
Receving the reply to the message does not mean that the FW can receive
a new message, but from the kernel pow the IPC sequence was done (when
we receive out of sync ACK, the reply data might not be valid anymore).
We cannot make the IPC completion based on the ACK as I have seen in
debug logs all permutations: ACK then REPLY, ACK and REPLAY at the same
time, REPLY followed by ACK.
Having said that, I think this is still a bit safer to not loose
messages, 200us is close to polling.
> + if (i == SOF_IPC4_TX_BUSY_RETRIES)
> + dev_dbg(sdev->dev, "%s: TX still busy after %d retries\n",
> + __func__, i);
no need to print the __func_ it is added by the infra.
Can you add
dev_dbg(sdev->dev, "message sending delayed by %d loops for %#x|%#x\n",
i, ipc4_msg->primary, ipc4_msg->extension);
when the message got delayed due to EBUSY?
> if (ret) {
> dev_err_ratelimited(sdev->dev,
> "%s: ipc message send for %#x|%#x failed: %d\n",
--
Péter
The SOF IPC4 platform send_msg functions (hda_dsp_ipc4_send_msg,
mtl_ipc_send_msg, cnl_ipc4_send_msg) previously stored the message in
delayed_ipc_tx_msg and returned 0 when the TX register was busy. The
deferred message was supposed to be dispatched from the IRQ handler
when the DSP acknowledged the previous message.
This mechanism silently drops messages during D0i3 power transitions
because the IRQ handler never fires while the DSP is in a low-power
state. The caller then hangs in wait_event_timeout() for up to 500ms
per IPC chunk, causing multi-second audio stalls under CPU load.
Fix this by making the platform send_msg functions return -EBUSY
immediately when the TX register is busy (safe since they execute
under spin_lock_irq in sof_ipc_send_msg), and adding a bounded retry
loop with usleep_range() in ipc4_tx_msg_unlocked() which only holds
the tx_mutex (a sleepable context). The retry loop attempts up to 50
iterations with 100-200us delays, bounding the maximum busy-wait to
approximately 10ms instead of the previous 500ms timeout.
Also remove the now-dead delayed_ipc_tx_msg field from
sof_intel_hda_dev, the dispatch code, and the ack_received tracking
variable from all three IRQ thread handlers (hda_dsp_ipc4_irq_thread,
mtl_ipc_irq_thread, cnl_ipc4_irq_thread).
Signed-off-by: Cole Leavitt <cole@unwrap.rs>
---
Changes in v2:
- Removed __func__ from debug prints (dyndbg adds it automatically)
- Added dev_dbg() when message sending is delayed due to EBUSY
- Dropped patch 2/2 (dai_link_hw_ready) per Pierre's feedback
sound/soc/sof/intel/cnl.c | 17 ++---------------
sound/soc/sof/intel/hda-ipc.c | 17 ++---------------
sound/soc/sof/intel/hda.h | 8 --------
sound/soc/sof/intel/mtl.c | 17 ++---------------
sound/soc/sof/ipc4.c | 20 ++++++++++++++++++--
5 files changed, 24 insertions(+), 55 deletions(-)
diff --git a/sound/soc/sof/intel/cnl.c b/sound/soc/sof/intel/cnl.c
index 0cc5725515e7..a2c6c7894a0f 100644
--- a/sound/soc/sof/intel/cnl.c
+++ b/sound/soc/sof/intel/cnl.c
@@ -37,7 +37,6 @@ irqreturn_t cnl_ipc4_irq_thread(int irq, void *context)
{
struct sof_ipc4_msg notification_data = {{ 0 }};
struct snd_sof_dev *sdev = context;
- bool ack_received = false;
bool ipc_irq = false;
u32 hipcida, hipctdr;
@@ -51,7 +50,6 @@ irqreturn_t cnl_ipc4_irq_thread(int irq, void *context)
cnl_ipc_dsp_done(sdev);
ipc_irq = true;
- ack_received = true;
}
if (hipctdr & CNL_DSP_REG_HIPCTDR_BUSY) {
@@ -101,13 +99,6 @@ irqreturn_t cnl_ipc4_irq_thread(int irq, void *context)
/* This interrupt is not shared so no need to return IRQ_NONE. */
dev_dbg_ratelimited(sdev->dev, "nothing to do in IPC IRQ thread\n");
- if (ack_received) {
- struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
-
- if (hdev->delayed_ipc_tx_msg)
- cnl_ipc4_send_msg(sdev, hdev->delayed_ipc_tx_msg);
- }
-
return IRQ_HANDLED;
}
EXPORT_SYMBOL_NS(cnl_ipc4_irq_thread, "SND_SOC_SOF_INTEL_CNL");
@@ -266,12 +257,8 @@ int cnl_ipc4_send_msg(struct snd_sof_dev *sdev, struct snd_sof_ipc_msg *msg)
struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
struct sof_ipc4_msg *msg_data = msg->msg_data;
- if (hda_ipc4_tx_is_busy(sdev)) {
- hdev->delayed_ipc_tx_msg = msg;
- return 0;
- }
-
- hdev->delayed_ipc_tx_msg = NULL;
+ if (hda_ipc4_tx_is_busy(sdev))
+ return -EBUSY;
/* send the message via mailbox */
if (msg_data->data_size)
diff --git a/sound/soc/sof/intel/hda-ipc.c b/sound/soc/sof/intel/hda-ipc.c
index 94425c510861..78449452041c 100644
--- a/sound/soc/sof/intel/hda-ipc.c
+++ b/sound/soc/sof/intel/hda-ipc.c
@@ -106,12 +106,8 @@ int hda_dsp_ipc4_send_msg(struct snd_sof_dev *sdev, struct snd_sof_ipc_msg *msg)
struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
struct sof_ipc4_msg *msg_data = msg->msg_data;
- if (hda_ipc4_tx_is_busy(sdev)) {
- hdev->delayed_ipc_tx_msg = msg;
- return 0;
- }
-
- hdev->delayed_ipc_tx_msg = NULL;
+ if (hda_ipc4_tx_is_busy(sdev))
+ return -EBUSY;
/* send the message via mailbox */
if (msg_data->data_size)
@@ -168,7 +164,6 @@ irqreturn_t hda_dsp_ipc4_irq_thread(int irq, void *context)
{
struct sof_ipc4_msg notification_data = {{ 0 }};
struct snd_sof_dev *sdev = context;
- bool ack_received = false;
bool ipc_irq = false;
u32 hipcie, hipct;
@@ -182,7 +177,6 @@ irqreturn_t hda_dsp_ipc4_irq_thread(int irq, void *context)
hda_dsp_ipc_dsp_done(sdev);
ipc_irq = true;
- ack_received = true;
}
if (hipct & HDA_DSP_REG_HIPCT_BUSY) {
@@ -236,13 +230,6 @@ irqreturn_t hda_dsp_ipc4_irq_thread(int irq, void *context)
/* This interrupt is not shared so no need to return IRQ_NONE. */
dev_dbg_ratelimited(sdev->dev, "nothing to do in IPC IRQ thread\n");
- if (ack_received) {
- struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
-
- if (hdev->delayed_ipc_tx_msg)
- hda_dsp_ipc4_send_msg(sdev, hdev->delayed_ipc_tx_msg);
- }
-
return IRQ_HANDLED;
}
EXPORT_SYMBOL_NS(hda_dsp_ipc4_irq_thread, "SND_SOC_SOF_INTEL_HDA_COMMON");
diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h
index 562fe8be79c1..ac9f76a5ef97 100644
--- a/sound/soc/sof/intel/hda.h
+++ b/sound/soc/sof/intel/hda.h
@@ -549,14 +549,6 @@ struct sof_intel_hda_dev {
/* work queue for mic privacy state change notification sending */
struct sof_ace3_mic_privacy mic_privacy;
-
- /*
- * Pointing to the IPC message if immediate sending was not possible
- * because the downlink communication channel was BUSY at the time.
- * The message will be re-tried when the channel becomes free (the ACK
- * is received from the DSP for the previous message)
- */
- struct snd_sof_ipc_msg *delayed_ipc_tx_msg;
};
static inline struct hdac_bus *sof_to_bus(struct snd_sof_dev *s)
diff --git a/sound/soc/sof/intel/mtl.c b/sound/soc/sof/intel/mtl.c
index 095dcf1a18e4..24dec128f589 100644
--- a/sound/soc/sof/intel/mtl.c
+++ b/sound/soc/sof/intel/mtl.c
@@ -101,12 +101,8 @@ static int mtl_ipc_send_msg(struct snd_sof_dev *sdev, struct snd_sof_ipc_msg *ms
struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
struct sof_ipc4_msg *msg_data = msg->msg_data;
- if (hda_ipc4_tx_is_busy(sdev)) {
- hdev->delayed_ipc_tx_msg = msg;
- return 0;
- }
-
- hdev->delayed_ipc_tx_msg = NULL;
+ if (hda_ipc4_tx_is_busy(sdev))
+ return -EBUSY;
/* send the message via mailbox */
if (msg_data->data_size)
@@ -559,7 +555,6 @@ static irqreturn_t mtl_ipc_irq_thread(int irq, void *context)
{
struct sof_ipc4_msg notification_data = {{ 0 }};
struct snd_sof_dev *sdev = context;
- bool ack_received = false;
bool ipc_irq = false;
u32 hipcida;
u32 hipctdr;
@@ -576,7 +571,6 @@ static irqreturn_t mtl_ipc_irq_thread(int irq, void *context)
mtl_ipc_dsp_done(sdev);
ipc_irq = true;
- ack_received = true;
}
if (hipctdr & MTL_DSP_REG_HFIPCXTDR_BUSY) {
@@ -628,13 +622,6 @@ static irqreturn_t mtl_ipc_irq_thread(int irq, void *context)
dev_dbg_ratelimited(sdev->dev, "nothing to do in IPC IRQ thread\n");
}
- if (ack_received) {
- struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
-
- if (hdev->delayed_ipc_tx_msg)
- mtl_ipc_send_msg(sdev, hdev->delayed_ipc_tx_msg);
- }
-
return IRQ_HANDLED;
}
diff --git a/sound/soc/sof/ipc4.c b/sound/soc/sof/ipc4.c
index a4a090e6724a..ad99e2e07b66 100644
--- a/sound/soc/sof/ipc4.c
+++ b/sound/soc/sof/ipc4.c
@@ -365,20 +365,36 @@ static int ipc4_wait_tx_done(struct snd_sof_ipc *ipc, void *reply_data)
return ret;
}
+#define SOF_IPC4_TX_BUSY_RETRIES 50
+#define SOF_IPC4_TX_BUSY_DELAY_US 100
+#define SOF_IPC4_TX_BUSY_DELAY_MAX_US 200
+
static int ipc4_tx_msg_unlocked(struct snd_sof_ipc *ipc,
void *msg_data, size_t msg_bytes,
void *reply_data, size_t reply_bytes)
{
struct sof_ipc4_msg *ipc4_msg = msg_data;
struct snd_sof_dev *sdev = ipc->sdev;
- int ret;
+ int ret, i;
if (msg_bytes > ipc->max_payload_size || reply_bytes > ipc->max_payload_size)
return -EINVAL;
sof_ipc4_log_header(sdev->dev, "ipc tx ", msg_data, true);
- ret = sof_ipc_send_msg(sdev, msg_data, msg_bytes, reply_bytes);
+ for (i = 0; i < SOF_IPC4_TX_BUSY_RETRIES; i++) {
+ ret = sof_ipc_send_msg(sdev, msg_data, msg_bytes, reply_bytes);
+ if (ret != -EBUSY)
+ break;
+ usleep_range(SOF_IPC4_TX_BUSY_DELAY_US,
+ SOF_IPC4_TX_BUSY_DELAY_MAX_US);
+ }
+ if (i == SOF_IPC4_TX_BUSY_RETRIES) {
+ dev_dbg(sdev->dev, "ipc tx failed: TX busy after %d retries\n", i);
+ } else if (i > 0) {
+ dev_dbg(sdev->dev, "ipc tx delayed by %d loops for %#x|%#x\n",
+ i, ipc4_msg->primary, ipc4_msg->extension);
+ }
if (ret) {
dev_err_ratelimited(sdev->dev,
"%s: ipc message send for %#x|%#x failed: %d\n",
--
2.52.0
On 17/02/2026 23:49, Cole Leavitt wrote:
> The SOF IPC4 platform send_msg functions (hda_dsp_ipc4_send_msg,
> mtl_ipc_send_msg, cnl_ipc4_send_msg) previously stored the message in
> delayed_ipc_tx_msg and returned 0 when the TX register was busy. The
> deferred message was supposed to be dispatched from the IRQ handler
> when the DSP acknowledged the previous message.
>
> This mechanism silently drops messages during D0i3 power transitions
> because the IRQ handler never fires while the DSP is in a low-power
> state. The caller then hangs in wait_event_timeout() for up to 500ms
> per IPC chunk, causing multi-second audio stalls under CPU load.
I think the agent get this a bit wrong and there is a cause effect mixup.
> Fix this by making the platform send_msg functions return -EBUSY
> immediately when the TX register is busy (safe since they execute
> under spin_lock_irq in sof_ipc_send_msg), and adding a bounded retry
> loop with usleep_range() in ipc4_tx_msg_unlocked() which only holds
> the tx_mutex (a sleepable context). The retry loop attempts up to 50
> iterations with 100-200us delays, bounding the maximum busy-wait to
> approximately 10ms instead of the previous 500ms timeout.
>
> Also remove the now-dead delayed_ipc_tx_msg field from
> sof_intel_hda_dev, the dispatch code, and the ack_received tracking
> variable from all three IRQ thread handlers (hda_dsp_ipc4_irq_thread,
> mtl_ipc_irq_thread, cnl_ipc4_irq_thread).
No messages were dropped, but if the firmware locks up during suspend
then we might enter low power while an IPC is delayed and send_msg is
wating for a reply (or timeout).
Yes, irq will not come, but it won't came even of the system would not
be on it's way to suspend.
The delayed handling as it is now is OK, it never looses messages,
everything is linear, it just takes a long time to go through several
messages when each of them times out because the fw is locked up.
In essence this patch reduces the 500ms default IPC timeout to 5-10ms
after an IPC timeout, levaing the FW less time to recover and not wating
for a reply.
It can also introduce a new race: if the FW clears the BUSY first and
then sends the reply and we were 'spinning' to send the next message we
might do so before receiving the reply to the previous message.
Which is fair, I think, but the commit message should be clear on this.
Please can you file the issue for sof/linux as I have asked with more
information? We had similar issues 2-3 years ago, but they were root
caused and fixed.
I'll need to think about this a bit more...
one commnet for ipc4.c
>
> Signed-off-by: Cole Leavitt <cole@unwrap.rs>
> ---
> Changes in v2:
> - Removed __func__ from debug prints (dyndbg adds it automatically)
> - Added dev_dbg() when message sending is delayed due to EBUSY
> - Dropped patch 2/2 (dai_link_hw_ready) per Pierre's feedback
>
> diff --git a/sound/soc/sof/ipc4.c b/sound/soc/sof/ipc4.c
> index a4a090e6724a..ad99e2e07b66 100644
> --- a/sound/soc/sof/ipc4.c
> +++ b/sound/soc/sof/ipc4.c
> @@ -365,20 +365,36 @@ static int ipc4_wait_tx_done(struct snd_sof_ipc *ipc, void *reply_data)
> return ret;
> }
>
> +#define SOF_IPC4_TX_BUSY_RETRIES 50
> +#define SOF_IPC4_TX_BUSY_DELAY_US 100
> +#define SOF_IPC4_TX_BUSY_DELAY_MAX_US 200
> +
> static int ipc4_tx_msg_unlocked(struct snd_sof_ipc *ipc,
> void *msg_data, size_t msg_bytes,
> void *reply_data, size_t reply_bytes)
> {
> struct sof_ipc4_msg *ipc4_msg = msg_data;
> struct snd_sof_dev *sdev = ipc->sdev;
> - int ret;
> + int ret, i;
>
> if (msg_bytes > ipc->max_payload_size || reply_bytes > ipc->max_payload_size)
> return -EINVAL;
>
> sof_ipc4_log_header(sdev->dev, "ipc tx ", msg_data, true);
>
> - ret = sof_ipc_send_msg(sdev, msg_data, msg_bytes, reply_bytes);
> + for (i = 0; i < SOF_IPC4_TX_BUSY_RETRIES; i++) {
> + ret = sof_ipc_send_msg(sdev, msg_data, msg_bytes, reply_bytes);
> + if (ret != -EBUSY)
> + break;
> + usleep_range(SOF_IPC4_TX_BUSY_DELAY_US,
> + SOF_IPC4_TX_BUSY_DELAY_MAX_US);
> + }
> + if (i == SOF_IPC4_TX_BUSY_RETRIES) {
> + dev_dbg(sdev->dev, "ipc tx failed: TX busy after %d retries\n", i);
this needs special treatment with unique error that can be used for
debugging purposes, something like:
dev_err(sdev->dev, "IPC busy, msg %#x|%#x cannot be sent\n",
ipc4_msg->primary, ipc4_msg->extension);
snd_sof_handle_fw_exception(ipc->sdev, "IPC busy");
return ret;
> + } else if (i > 0) {
> + dev_dbg(sdev->dev, "ipc tx delayed by %d loops for %#x|%#x\n",
> + i, ipc4_msg->primary, ipc4_msg->extension);
> + }
> if (ret) {
> dev_err_ratelimited(sdev->dev,
> "%s: ipc message send for %#x|%#x failed: %d\n",
--
Péter
© 2016 - 2026 Red Hat, Inc.