[PATCH 6/9] ASoC: qcom: q6asm-dai: schedule all available frames to avoid dsp under-runs

Srinivas Kandagatla posted 9 patches 2 months ago
There is a newer version of this series
[PATCH 6/9] ASoC: qcom: q6asm-dai: schedule all available frames to avoid dsp under-runs
Posted by Srinivas Kandagatla 2 months ago
With the existing code, we are only setting up one period at a time, in a
ping-pong buffer style. This triggers lot of underruns in the dsp
leading to jitter noise during audio playback.

Fix this by scheduling all available periods, this will ensure that the dsp
has enough buffer feed and ultimatley fixing the underruns and audio distortion.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@oss.qualcomm.com>
---
 sound/soc/qcom/qdsp6/q6asm-dai.c | 34 +++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c
index 0eae8c6e42b8..db2ea8973ac9 100644
--- a/sound/soc/qcom/qdsp6/q6asm-dai.c
+++ b/sound/soc/qcom/qdsp6/q6asm-dai.c
@@ -64,6 +64,7 @@ struct q6asm_dai_rtd {
 	uint64_t bytes_received;
 	uint64_t copied_total;
 	uint16_t bits_per_sample;
+	snd_pcm_uframes_t queue_ptr;
 	uint16_t source; /* Encoding source bit mask */
 	struct audio_client *audio_client;
 	uint32_t next_track_stream_id;
@@ -85,6 +86,7 @@ struct q6asm_dai_data {
 static const struct snd_pcm_hardware q6asm_dai_hardware_capture = {
 	.info =                 (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_BATCH |
 				SNDRV_PCM_INFO_BLOCK_TRANSFER |
+				SNDRV_PCM_INFO_NO_REWINDS | SNDRV_PCM_INFO_SYNC_APPLPTR |
 				SNDRV_PCM_INFO_MMAP_VALID |
 				SNDRV_PCM_INFO_INTERLEAVED |
 				SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME),
@@ -108,6 +110,7 @@ static const struct snd_pcm_hardware q6asm_dai_hardware_playback = {
 	.info =                 (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_BATCH |
 				SNDRV_PCM_INFO_BLOCK_TRANSFER |
 				SNDRV_PCM_INFO_MMAP_VALID |
+				SNDRV_PCM_INFO_NO_REWINDS | SNDRV_PCM_INFO_SYNC_APPLPTR |
 				SNDRV_PCM_INFO_INTERLEAVED |
 				SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME),
 	.formats =              (SNDRV_PCM_FMTBIT_S16_LE |
@@ -182,9 +185,6 @@ static void event_handler(uint32_t opcode, uint32_t token,
 
 	switch (opcode) {
 	case ASM_CLIENT_EVENT_CMD_RUN_DONE:
-		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-			q6asm_write_async(prtd->audio_client, prtd->stream_id,
-				   prtd->pcm_count, 0, 0, 0);
 		break;
 	case ASM_CLIENT_EVENT_CMD_EOS_DONE:
 		prtd->state = Q6ASM_STREAM_STOPPED;
@@ -192,10 +192,6 @@ static void event_handler(uint32_t opcode, uint32_t token,
 	case ASM_CLIENT_EVENT_DATA_WRITE_DONE: {
 		prtd->pcm_irq_pos += prtd->pcm_count;
 		snd_pcm_period_elapsed(substream);
-		if (prtd->state == Q6ASM_STREAM_RUNNING)
-			q6asm_write_async(prtd->audio_client, prtd->stream_id,
-					   prtd->pcm_count, 0, 0, 0);
-
 		break;
 		}
 	case ASM_CLIENT_EVENT_DATA_READ_DONE:
@@ -311,6 +307,29 @@ static int q6asm_dai_prepare(struct snd_soc_component *component,
 	return ret;
 }
 
+static int q6asm_dai_ack(struct snd_soc_component *component, struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct q6asm_dai_rtd *prtd = runtime->private_data;
+	int i, ret = 0, avail_periods;
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && prtd->state == Q6ASM_STREAM_RUNNING) {
+		avail_periods = (runtime->control->appl_ptr - prtd->queue_ptr)/runtime->period_size;
+		for (i = 0; i < avail_periods; i++) {
+			ret = q6asm_write_async(prtd->audio_client, prtd->stream_id,
+					   prtd->pcm_count, 0, 0, 0);
+
+			if (ret < 0) {
+				dev_err(component->dev, "Error queuing playback buffer %d\n", ret);
+				return ret;
+			}
+			prtd->queue_ptr += runtime->period_size;
+		}
+	}
+
+	return ret;
+}
+
 static int q6asm_dai_trigger(struct snd_soc_component *component,
 			     struct snd_pcm_substream *substream, int cmd)
 {
@@ -1215,6 +1234,7 @@ static const struct snd_soc_component_driver q6asm_fe_dai_component = {
 	.close			= q6asm_dai_close,
 	.prepare		= q6asm_dai_prepare,
 	.trigger		= q6asm_dai_trigger,
+	.ack			= q6asm_dai_ack,
 	.pointer		= q6asm_dai_pointer,
 	.pcm_construct		= q6asm_dai_pcm_new,
 	.compress_ops		= &q6asm_dai_compress_ops,
-- 
2.51.0
Re: [PATCH 6/9] ASoC: qcom: q6asm-dai: schedule all available frames to avoid dsp under-runs
Posted by Alexey Klimov 2 months ago
On Wed Oct 15, 2025 at 2:17 PM BST, Srinivas Kandagatla wrote:
> With the existing code, we are only setting up one period at a time, in a
> ping-pong buffer style. This triggers lot of underruns in the dsp
> leading to jitter noise during audio playback.
>
> Fix this by scheduling all available periods, this will ensure that the dsp
> has enough buffer feed and ultimatley fixing the underruns and audio distortion.
>
> Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@oss.qualcomm.com>
> ---
>  sound/soc/qcom/qdsp6/q6asm-dai.c | 34 +++++++++++++++++++++++++-------

[..]

> +static int q6asm_dai_ack(struct snd_soc_component *component, struct snd_pcm_substream *substream)
> +{
> +	struct snd_pcm_runtime *runtime = substream->runtime;
> +	struct q6asm_dai_rtd *prtd = runtime->private_data;
> +	int i, ret = 0, avail_periods;
> +
> +	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && prtd->state == Q6ASM_STREAM_RUNNING) {
> +		avail_periods = (runtime->control->appl_ptr - prtd->queue_ptr)/runtime->period_size;
> +		for (i = 0; i < avail_periods; i++) {
> +			ret = q6asm_write_async(prtd->audio_client, prtd->stream_id,
> +					   prtd->pcm_count, 0, 0, 0);
> +
> +			if (ret < 0) {
> +				dev_err(component->dev, "Error queuing playback buffer %d\n", ret);
> +				return ret;
> +			}
> +			prtd->queue_ptr += runtime->period_size;
> +		}
> +	}
> +
> +	return ret;
> +}

So when I compiled this on a common arm64 desktop machine with defconfig,
nothing fancy, with gcc, this loop and function really does compile with
udiv instruction.

avail_periods = (runtime->control->appl_ptr - prtd->queue_ptr)/runtime->period_size;
     350:       f9001bf7        str     x23, [sp, #48]
     354:       f94086a0        ldr     x0, [x21, #264]
     358:       f9408262        ldr     x2, [x19, #256]
     35c:       f9400000        ldr     x0, [x0]
     360:       f9403ea1        ldr     x1, [x21, #120]
     364:       cb020000        sub     x0, x0, x2
     368:       9ac10800        udiv    x0, x0, x1  <--- here
     36c:       2a0003f7        mov     w23, w0

What do you think about rewriting this loop without division and
without using additional iterator? I don't think this is a hot path
but anyway.

The first iteration that I came up with is (1):


diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c
index 97256313c01a..d01f805947b8 100644
--- a/sound/soc/qcom/qdsp6/q6asm-dai.c
+++ b/sound/soc/qcom/qdsp6/q6asm-dai.c
@@ -310,6 +310,23 @@ static int q6asm_dai_ack(struct snd_soc_component *component, struct snd_pcm_sub
 	int ret = 0;
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && prtd->state == Q6ASM_STREAM_RUNNING) {
+		if (prtd->queue_ptr < runtime->control->appl_ptr) {
+
+			do {
+				ret = q6asm_write_async(prtd->audio_client, prtd->stream_id,
+							prtd->pcm_count, 0, 0, 0);
+
+				if (ret < 0) {
+						dev_err(component->dev, "Error queuing playback buffer %d\n", ret);
+						return ret;
+				}
+
+				prtd->queue_ptr += runtime->period_size;
+
+			} while (prtd->queue_ptr < runtime->control->appl_ptr);
+
+		}

No division and no calculation of iterator and avail_periods.

Rewriting it further (2):

diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c
index 97256313c01a..317f06d07a09 100644
--- a/sound/soc/qcom/qdsp6/q6asm-dai.c
+++ b/sound/soc/qcom/qdsp6/q6asm-dai.c
@@ -307,9 +307,26 @@ static int q6asm_dai_ack(struct snd_soc_component *component, struct snd_pcm_sub
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct q6asm_dai_rtd *prtd = runtime->private_data;
-	int i, ret = 0, avail_periods;
+	int ret = 0, diff;
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && prtd->state == Q6ASM_STREAM_RUNNING) {
+		diff = (runtime->control->appl_ptr - prtd->queue_ptr);
+		
+		while (diff >= runtime->period_size) {
+			ret = q6asm_write_async(prtd->audio_client, prtd->stream_id,
+						prtd->pcm_count, 0, 0, 0);
+			
+			if (ret < 0) {
+				dev_err(component->dev, "Error queuing playback buffer %d\n", ret);
+				return ret;
+			}
+
+			prtd->queue_ptr += runtime->period_size;
+			diff -= runtime->period_size;
+		};
+
+

Surprisingly, in (1) the size of resulting object file is smaller than in (2):

With original patch:	110008 Oct 20 15:26 sound/soc/qcom/qdsp6/q6asm-dai.o
with (1): 		109776 Oct 20 16:53 sound/soc/qcom/qdsp6/q6asm-dai.o
with (2):		109896 Oct 20 16:52 sound/soc/qcom/qdsp6/q6asm-dai.o

I think the readability won't be damaged as a result of the rewriting
and the code seems to be smaller.
Obviusly, this needs to be verified for some corner cases and etc.
I'd go with (1) but it is up to you. I tested (1) and it seems to work.

Best regards,
Alexey
Re: [PATCH 6/9] ASoC: qcom: q6asm-dai: schedule all available frames to avoid dsp under-runs
Posted by Srinivas Kandagatla 1 month, 3 weeks ago
On 10/20/25 5:06 PM, Alexey Klimov wrote:
> On Wed Oct 15, 2025 at 2:17 PM BST, Srinivas Kandagatla wrote:
>> With the existing code, we are only setting up one period at a time, in a
>> ping-pong buffer style. This triggers lot of underruns in the dsp
>> leading to jitter noise during audio playback.
>>
>> Fix this by scheduling all available periods, this will ensure that the dsp
>> has enough buffer feed and ultimatley fixing the underruns and audio distortion.
>>
>> Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@oss.qualcomm.com>
>> ---
>>  sound/soc/qcom/qdsp6/q6asm-dai.c | 34 +++++++++++++++++++++++++-------
> 
> [..]
> 
>> +static int q6asm_dai_ack(struct snd_soc_component *component, struct snd_pcm_substream *substream)
>> +{
>> +	struct snd_pcm_runtime *runtime = substream->runtime;
>> +	struct q6asm_dai_rtd *prtd = runtime->private_data;
>> +	int i, ret = 0, avail_periods;
>> +
>> +	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && prtd->state == Q6ASM_STREAM_RUNNING) {
>> +		avail_periods = (runtime->control->appl_ptr - prtd->queue_ptr)/runtime->period_size;
>> +		for (i = 0; i < avail_periods; i++) {
>> +			ret = q6asm_write_async(prtd->audio_client, prtd->stream_id,
>> +					   prtd->pcm_count, 0, 0, 0);
>> +
>> +			if (ret < 0) {
>> +				dev_err(component->dev, "Error queuing playback buffer %d\n", ret);
>> +				return ret;
>> +			}
>> +			prtd->queue_ptr += runtime->period_size;
>> +		}
>> +	}
>> +
>> +	return ret;
>> +}

We have exactly same logic in q6apm-dai.c I will keep this as it is for
this patch series, we can always improve this over time. Patches welcome.>
> So when I compiled this on a common arm64 desktop machine with defconfig,
> nothing fancy, with gcc, this loop and function really does compile with
> udiv instruction.
> 
> avail_periods = (runtime->control->appl_ptr - prtd->queue_ptr)/runtime->period_size;
>      350:       f9001bf7        str     x23, [sp, #48]
>      354:       f94086a0        ldr     x0, [x21, #264]
>      358:       f9408262        ldr     x2, [x19, #256]
>      35c:       f9400000        ldr     x0, [x0]
>      360:       f9403ea1        ldr     x1, [x21, #120]
>      364:       cb020000        sub     x0, x0, x2
>      368:       9ac10800        udiv    x0, x0, x1  <--- here
>      36c:       2a0003f7        mov     w23, w0
> 
> What do you think about rewriting this loop without division and
> without using additional iterator? I don't think this is a hot path
> but anyway.
> 
> The first iteration that I came up with is (1):
> 
> 
> diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c
> index 97256313c01a..d01f805947b8 100644
> --- a/sound/soc/qcom/qdsp6/q6asm-dai.c
> +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c
> @@ -310,6 +310,23 @@ static int q6asm_dai_ack(struct snd_soc_component *component, struct snd_pcm_sub
>  	int ret = 0;
>  
>  	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && prtd->state == Q6ASM_STREAM_RUNNING) {
> +		if (prtd->queue_ptr < runtime->control->appl_ptr) {
This is not going to work, as we need alteast 1 period of data.> +
> +			do {
> +				ret = q6asm_write_async(prtd->audio_client, prtd->stream_id,
> +							prtd->pcm_count, 0, 0, 0);
> +
> +				if (ret < 0) {
> +						dev_err(component->dev, "Error queuing playback buffer %d\n", ret);
> +						return ret;
> +				}
> +
> +				prtd->queue_ptr += runtime->period_size;
This queue_ptr can go over appl_ptr.. can impact sound quality as we
will be queuing something that is not in the data yet.

--srini> +
> +			} while (prtd->queue_ptr < runtime->control->appl_ptr);
> +
> +		}
> 
> No division and no calculation of iterator and avail_periods.
> 
> Rewriting it further (2):
> 
> diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c
> index 97256313c01a..317f06d07a09 100644
> --- a/sound/soc/qcom/qdsp6/q6asm-dai.c
> +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c
> @@ -307,9 +307,26 @@ static int q6asm_dai_ack(struct snd_soc_component *component, struct snd_pcm_sub
>  {
>  	struct snd_pcm_runtime *runtime = substream->runtime;
>  	struct q6asm_dai_rtd *prtd = runtime->private_data;
> -	int i, ret = 0, avail_periods;
> +	int ret = 0, diff;
>  
>  	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && prtd->state == Q6ASM_STREAM_RUNNING) {
> +		diff = (runtime->control->appl_ptr - prtd->queue_ptr);
> +		
> +		while (diff >= runtime->period_size) {
> +			ret = q6asm_write_async(prtd->audio_client, prtd->stream_id,
> +						prtd->pcm_count, 0, 0, 0);
> +			
> +			if (ret < 0) {
> +				dev_err(component->dev, "Error queuing playback buffer %d\n", ret);
> +				return ret;
> +			}
> +
> +			prtd->queue_ptr += runtime->period_size;
> +			diff -= runtime->period_size;
> +		};
> +
> +
> 
> Surprisingly, in (1) the size of resulting object file is smaller than in (2):
> 
> With original patch:	110008 Oct 20 15:26 sound/soc/qcom/qdsp6/q6asm-dai.o
> with (1): 		109776 Oct 20 16:53 sound/soc/qcom/qdsp6/q6asm-dai.o
> with (2):		109896 Oct 20 16:52 sound/soc/qcom/qdsp6/q6asm-dai.o
> 
> I think the readability won't be damaged as a result of the rewriting
> and the code seems to be smaller.
> Obviusly, this needs to be verified for some corner cases and etc.
> I'd go with (1) but it is up to you. I tested (1) and it seems to work.
> 
> Best regards,
> Alexey
> 
> 
> 
> 
>