[PATCH v4 5/6] spi: spi-fsl-dspi: Increase DMA buffer size

James Clark posted 6 patches 3 months, 1 week ago
There is a newer version of this series
[PATCH v4 5/6] spi: spi-fsl-dspi: Increase DMA buffer size
Posted by James Clark 3 months, 1 week ago
From: Larisa Grigore <larisa.grigore@nxp.com>

When the device is configured as a target, the host won't stop sending
data while we're draining the buffer which leads to FIFO underflows
and corruption.

Increase the DMA buffer size to the maximum words that edma can
transfer once to reduce the chance of this happening.

While we're here, also change the buffer size for host mode back to a
page as it was before commit a957499bd437 ("spi: spi-fsl-dspi: Fix
bits-per-word acceleration in DMA mode"). dma_alloc_noncoherent()
allocations are backed by a full page anyway, so we might as well use it
all.

Signed-off-by: Larisa Grigore <larisa.grigore@nxp.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
 drivers/spi/spi-fsl-dspi.c | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index e7856f9c9440..46d3cae9efed 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -493,6 +493,39 @@ static u32 dspi_pop_tx_pushr(struct fsl_dspi *dspi)
 	return cmd << 16 | data;
 }
 
+static int dspi_dma_bufsize(struct fsl_dspi *dspi)
+{
+	if (spi_controller_is_target(dspi->ctlr)) {
+		/*
+		 * In target mode we have to be ready to receive the maximum
+		 * that can possibly be transferred at once by EDMA without any
+		 * FIFO underflows. This is CITER * SSIZE, where SSIZE is a max
+		 * of 4 when transferring to a peripheral.
+		 */
+		return GENMASK(14, 0) * DMA_SLAVE_BUSWIDTH_4_BYTES;
+	}
+
+	return PAGE_SIZE;
+}
+
+static int dspi_dma_max_datawords(struct fsl_dspi *dspi)
+{
+	/*
+	 * Transfers look like this so we always use a full DMA word regardless
+	 * of SPI word size:
+	 *
+	 * 31              16 15                   0
+	 * -----------------------------------------
+	 * |   CONTROL WORD  |     16-bit DATA     |
+	 * -----------------------------------------
+	 * or
+	 * -----------------------------------------
+	 * |   CONTROL WORD  | UNUSED | 8-bit DATA |
+	 * -----------------------------------------
+	 */
+	return dspi_dma_bufsize(dspi) / DMA_SLAVE_BUSWIDTH_4_BYTES;
+}
+
 static int dspi_dma_transfer_size(struct fsl_dspi *dspi)
 {
 	return dspi->words_in_flight * DMA_SLAVE_BUSWIDTH_4_BYTES;
@@ -608,6 +641,7 @@ static int dspi_next_xfer_dma_submit(struct fsl_dspi *dspi)
 static void dspi_dma_xfer(struct fsl_dspi *dspi)
 {
 	struct spi_message *message = dspi->cur_msg;
+	int max_words = dspi_dma_max_datawords(dspi);
 	struct device *dev = &dspi->pdev->dev;
 
 	/*
@@ -619,8 +653,8 @@ static void dspi_dma_xfer(struct fsl_dspi *dspi)
 		dspi_setup_accel(dspi);
 
 		dspi->words_in_flight = dspi->len / dspi->oper_word_size;
-		if (dspi->words_in_flight > dspi->devtype_data->fifo_size)
-			dspi->words_in_flight = dspi->devtype_data->fifo_size;
+		if (dspi->words_in_flight > max_words)
+			dspi->words_in_flight = max_words;
 
 		message->actual_length += dspi->words_in_flight *
 					  dspi->oper_word_size;
@@ -635,7 +669,7 @@ static void dspi_dma_xfer(struct fsl_dspi *dspi)
 
 static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
 {
-	int dma_bufsize = dspi->devtype_data->fifo_size * 2;
+	int dma_bufsize = dspi_dma_bufsize(dspi);
 	struct device *dev = &dspi->pdev->dev;
 	struct dma_slave_config cfg;
 	struct fsl_dspi_dma *dma;
@@ -719,7 +753,7 @@ static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
 
 static void dspi_release_dma(struct fsl_dspi *dspi)
 {
-	int dma_bufsize = dspi->devtype_data->fifo_size * 2;
+	int dma_bufsize = dspi_dma_bufsize(dspi);
 	struct fsl_dspi_dma *dma = dspi->dma;
 
 	if (!dma)

-- 
2.34.1
Re: [PATCH v4 5/6] spi: spi-fsl-dspi: Increase DMA buffer size
Posted by Vladimir Oltean 3 months, 1 week ago
On Fri, Jun 27, 2025 at 11:21:41AM +0100, James Clark wrote:
> diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
> index e7856f9c9440..46d3cae9efed 100644
> --- a/drivers/spi/spi-fsl-dspi.c
> +++ b/drivers/spi/spi-fsl-dspi.c
> @@ -493,6 +493,39 @@ static u32 dspi_pop_tx_pushr(struct fsl_dspi *dspi)
>  	return cmd << 16 | data;
>  }
>  
> +static int dspi_dma_bufsize(struct fsl_dspi *dspi)
> +{
> +	if (spi_controller_is_target(dspi->ctlr)) {
> +		/*
> +		 * In target mode we have to be ready to receive the maximum
> +		 * that can possibly be transferred at once by EDMA without any
> +		 * FIFO underflows. This is CITER * SSIZE, where SSIZE is a max
> +		 * of 4 when transferring to a peripheral.
> +		 */
> +		return GENMASK(14, 0) * DMA_SLAVE_BUSWIDTH_4_BYTES;

Is this really a constant that can be hardcoded? Should this be queried
from the EDMA driver somehow?

I'm not well versed in the dmaengine/dma-mapping API at all, but I see
fsl_edma_probe() makes a call to dma_set_max_seg_size(), which consumer
drivers such as DSPI can query using dma_get_max_seg_size(). To the
untrained eye, and from a great distance, it looks like the value you're
interested in. Apologies if that isn't the case.

> +	}
> +
> +	return PAGE_SIZE;
> +}

The other question is: what's fundamentally different between the host
and target operating modes, such that we return different values? Why
not the same?
Re: [PATCH v4 5/6] spi: spi-fsl-dspi: Increase DMA buffer size
Posted by Arnd Bergmann 3 months, 1 week ago
On Tue, Jul 1, 2025, at 16:47, Vladimir Oltean wrote:
> On Fri, Jun 27, 2025 at 11:21:41AM +0100, James Clark wrote:

>> +	}
>> +
>> +	return PAGE_SIZE;
>> +}
>
> The other question is: what's fundamentally different between the host
> and target operating modes, such that we return different values? Why
> not the same?

In host mode, the driver is able to split up a transfer into smaller
chunks, while in target mode the length of the transfer is determined
by the remote host and can be larger than whatever default buffer
size we pick.

Using PAGE_SIZE as the default host buffer makes sense since that
is the smallest underlying size for dma_alloc_noncoherent, and
larger buffers would be fairly wasteful.

Endpoint mode should only be enabled if it's actually being used
and in that case the allocation is as large as possible.

       Arnd
Re: [PATCH v4 5/6] spi: spi-fsl-dspi: Increase DMA buffer size
Posted by James Clark 3 months, 1 week ago

On 01/07/2025 3:47 pm, Vladimir Oltean wrote:
> On Fri, Jun 27, 2025 at 11:21:41AM +0100, James Clark wrote:
>> diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
>> index e7856f9c9440..46d3cae9efed 100644
>> --- a/drivers/spi/spi-fsl-dspi.c
>> +++ b/drivers/spi/spi-fsl-dspi.c
>> @@ -493,6 +493,39 @@ static u32 dspi_pop_tx_pushr(struct fsl_dspi *dspi)
>>   	return cmd << 16 | data;
>>   }
>>   
>> +static int dspi_dma_bufsize(struct fsl_dspi *dspi)
>> +{
>> +	if (spi_controller_is_target(dspi->ctlr)) {
>> +		/*
>> +		 * In target mode we have to be ready to receive the maximum
>> +		 * that can possibly be transferred at once by EDMA without any
>> +		 * FIFO underflows. This is CITER * SSIZE, where SSIZE is a max
>> +		 * of 4 when transferring to a peripheral.
>> +		 */
>> +		return GENMASK(14, 0) * DMA_SLAVE_BUSWIDTH_4_BYTES;
> 
> Is this really a constant that can be hardcoded? Should this be queried
> from the EDMA driver somehow?
> 
> I'm not well versed in the dmaengine/dma-mapping API at all, but I see
> fsl_edma_probe() makes a call to dma_set_max_seg_size(), which consumer
> drivers such as DSPI can query using dma_get_max_seg_size(). To the
> untrained eye, and from a great distance, it looks like the value you're
> interested in. Apologies if that isn't the case.
> 

You're probably right, and there's no particular reason to hard code it 
if it can be queried. I'll have a look at this.

>> +	}
>> +
>> +	return PAGE_SIZE;
>> +}
> 
> The other question is: what's fundamentally different between the host
> and target operating modes, such that we return different values? Why
> not the same?

This is missing from the commit message, but the reason is because it's 
a large allocation (256K with both tx and rx buffers) that should be 
avoided unless absolutely necessary so we wanted to limit it to only 
target devices.

The other reason to not allocate it dynamically based on the size of the 
message is because we assumed that it was better to do large contiguous 
allocations at boot time. If it's delayed until the device is used then 
the allocation might fail due to memory fragmentation.
Re: [PATCH v4 5/6] spi: spi-fsl-dspi: Increase DMA buffer size
Posted by Frank Li 3 months, 1 week ago
On Fri, Jun 27, 2025 at 11:21:41AM +0100, James Clark wrote:
> From: Larisa Grigore <larisa.grigore@nxp.com>
>
> When the device is configured as a target, the host won't stop sending
> data while we're draining the buffer which leads to FIFO underflows
> and corruption.
>
> Increase the DMA buffer size to the maximum words that edma can
> transfer once to reduce the chance of this happening.
>
> While we're here, also change the buffer size for host mode back to a
> page as it was before commit a957499bd437 ("spi: spi-fsl-dspi: Fix
> bits-per-word acceleration in DMA mode"). dma_alloc_noncoherent()
> allocations are backed by a full page anyway, so we might as well use it
> all.
>
> Signed-off-by: Larisa Grigore <larisa.grigore@nxp.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
>  drivers/spi/spi-fsl-dspi.c | 42 ++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 38 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
> index e7856f9c9440..46d3cae9efed 100644
> --- a/drivers/spi/spi-fsl-dspi.c
> +++ b/drivers/spi/spi-fsl-dspi.c
> @@ -493,6 +493,39 @@ static u32 dspi_pop_tx_pushr(struct fsl_dspi *dspi)
>  	return cmd << 16 | data;
>  }
>
> +static int dspi_dma_bufsize(struct fsl_dspi *dspi)
> +{
> +	if (spi_controller_is_target(dspi->ctlr)) {
> +		/*
> +		 * In target mode we have to be ready to receive the maximum
> +		 * that can possibly be transferred at once by EDMA without any
> +		 * FIFO underflows. This is CITER * SSIZE, where SSIZE is a max
> +		 * of 4 when transferring to a peripheral.
> +		 */
> +		return GENMASK(14, 0) * DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	}
> +
> +	return PAGE_SIZE;
> +}
> +
> +static int dspi_dma_max_datawords(struct fsl_dspi *dspi)
> +{
> +	/*
> +	 * Transfers look like this so we always use a full DMA word regardless
> +	 * of SPI word size:
> +	 *
> +	 * 31              16 15                   0
> +	 * -----------------------------------------
> +	 * |   CONTROL WORD  |     16-bit DATA     |
> +	 * -----------------------------------------
> +	 * or
> +	 * -----------------------------------------
> +	 * |   CONTROL WORD  | UNUSED | 8-bit DATA |
> +	 * -----------------------------------------
> +	 */
> +	return dspi_dma_bufsize(dspi) / DMA_SLAVE_BUSWIDTH_4_BYTES;
> +}
> +
>  static int dspi_dma_transfer_size(struct fsl_dspi *dspi)
>  {
>  	return dspi->words_in_flight * DMA_SLAVE_BUSWIDTH_4_BYTES;
> @@ -608,6 +641,7 @@ static int dspi_next_xfer_dma_submit(struct fsl_dspi *dspi)
>  static void dspi_dma_xfer(struct fsl_dspi *dspi)
>  {
>  	struct spi_message *message = dspi->cur_msg;
> +	int max_words = dspi_dma_max_datawords(dspi);
>  	struct device *dev = &dspi->pdev->dev;
>
>  	/*
> @@ -619,8 +653,8 @@ static void dspi_dma_xfer(struct fsl_dspi *dspi)
>  		dspi_setup_accel(dspi);
>
>  		dspi->words_in_flight = dspi->len / dspi->oper_word_size;
> -		if (dspi->words_in_flight > dspi->devtype_data->fifo_size)
> -			dspi->words_in_flight = dspi->devtype_data->fifo_size;
> +		if (dspi->words_in_flight > max_words)
> +			dspi->words_in_flight = max_words;

you touch this line

		dspi->words_in_flight = min(dspi->words_in_flight, max_words);

Frank
>
>  		message->actual_length += dspi->words_in_flight *
>  					  dspi->oper_word_size;
> @@ -635,7 +669,7 @@ static void dspi_dma_xfer(struct fsl_dspi *dspi)
>
>  static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
>  {
> -	int dma_bufsize = dspi->devtype_data->fifo_size * 2;
> +	int dma_bufsize = dspi_dma_bufsize(dspi);
>  	struct device *dev = &dspi->pdev->dev;
>  	struct dma_slave_config cfg;
>  	struct fsl_dspi_dma *dma;
> @@ -719,7 +753,7 @@ static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
>
>  static void dspi_release_dma(struct fsl_dspi *dspi)
>  {
> -	int dma_bufsize = dspi->devtype_data->fifo_size * 2;
> +	int dma_bufsize = dspi_dma_bufsize(dspi);
>  	struct fsl_dspi_dma *dma = dspi->dma;
>
>  	if (!dma)
>
> --
> 2.34.1
>
Re: [PATCH v4 5/6] spi: spi-fsl-dspi: Increase DMA buffer size
Posted by James Clark 3 months, 1 week ago

On 27/06/2025 8:44 pm, Frank Li wrote:
> On Fri, Jun 27, 2025 at 11:21:41AM +0100, James Clark wrote:
>> From: Larisa Grigore <larisa.grigore@nxp.com>
>>
>> When the device is configured as a target, the host won't stop sending
>> data while we're draining the buffer which leads to FIFO underflows
>> and corruption.
>>
>> Increase the DMA buffer size to the maximum words that edma can
>> transfer once to reduce the chance of this happening.
>>
>> While we're here, also change the buffer size for host mode back to a
>> page as it was before commit a957499bd437 ("spi: spi-fsl-dspi: Fix
>> bits-per-word acceleration in DMA mode"). dma_alloc_noncoherent()
>> allocations are backed by a full page anyway, so we might as well use it
>> all.
>>
>> Signed-off-by: Larisa Grigore <larisa.grigore@nxp.com>
>> Signed-off-by: James Clark <james.clark@linaro.org>
>> ---
>>   drivers/spi/spi-fsl-dspi.c | 42 ++++++++++++++++++++++++++++++++++++++----
>>   1 file changed, 38 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
>> index e7856f9c9440..46d3cae9efed 100644
>> --- a/drivers/spi/spi-fsl-dspi.c
>> +++ b/drivers/spi/spi-fsl-dspi.c
>> @@ -493,6 +493,39 @@ static u32 dspi_pop_tx_pushr(struct fsl_dspi *dspi)
>>   	return cmd << 16 | data;
>>   }
>>
>> +static int dspi_dma_bufsize(struct fsl_dspi *dspi)
>> +{
>> +	if (spi_controller_is_target(dspi->ctlr)) {
>> +		/*
>> +		 * In target mode we have to be ready to receive the maximum
>> +		 * that can possibly be transferred at once by EDMA without any
>> +		 * FIFO underflows. This is CITER * SSIZE, where SSIZE is a max
>> +		 * of 4 when transferring to a peripheral.
>> +		 */
>> +		return GENMASK(14, 0) * DMA_SLAVE_BUSWIDTH_4_BYTES;
>> +	}
>> +
>> +	return PAGE_SIZE;
>> +}
>> +
>> +static int dspi_dma_max_datawords(struct fsl_dspi *dspi)
>> +{
>> +	/*
>> +	 * Transfers look like this so we always use a full DMA word regardless
>> +	 * of SPI word size:
>> +	 *
>> +	 * 31              16 15                   0
>> +	 * -----------------------------------------
>> +	 * |   CONTROL WORD  |     16-bit DATA     |
>> +	 * -----------------------------------------
>> +	 * or
>> +	 * -----------------------------------------
>> +	 * |   CONTROL WORD  | UNUSED | 8-bit DATA |
>> +	 * -----------------------------------------
>> +	 */
>> +	return dspi_dma_bufsize(dspi) / DMA_SLAVE_BUSWIDTH_4_BYTES;
>> +}
>> +
>>   static int dspi_dma_transfer_size(struct fsl_dspi *dspi)
>>   {
>>   	return dspi->words_in_flight * DMA_SLAVE_BUSWIDTH_4_BYTES;
>> @@ -608,6 +641,7 @@ static int dspi_next_xfer_dma_submit(struct fsl_dspi *dspi)
>>   static void dspi_dma_xfer(struct fsl_dspi *dspi)
>>   {
>>   	struct spi_message *message = dspi->cur_msg;
>> +	int max_words = dspi_dma_max_datawords(dspi);
>>   	struct device *dev = &dspi->pdev->dev;
>>
>>   	/*
>> @@ -619,8 +653,8 @@ static void dspi_dma_xfer(struct fsl_dspi *dspi)
>>   		dspi_setup_accel(dspi);
>>
>>   		dspi->words_in_flight = dspi->len / dspi->oper_word_size;
>> -		if (dspi->words_in_flight > dspi->devtype_data->fifo_size)
>> -			dspi->words_in_flight = dspi->devtype_data->fifo_size;
>> +		if (dspi->words_in_flight > max_words)
>> +			dspi->words_in_flight = max_words;
> 
> you touch this line
> 
> 		dspi->words_in_flight = min(dspi->words_in_flight, max_words);
> 
> Frank

Ack

>>
>>   		message->actual_length += dspi->words_in_flight *
>>   					  dspi->oper_word_size;
>> @@ -635,7 +669,7 @@ static void dspi_dma_xfer(struct fsl_dspi *dspi)
>>
>>   static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
>>   {
>> -	int dma_bufsize = dspi->devtype_data->fifo_size * 2;
>> +	int dma_bufsize = dspi_dma_bufsize(dspi);
>>   	struct device *dev = &dspi->pdev->dev;
>>   	struct dma_slave_config cfg;
>>   	struct fsl_dspi_dma *dma;
>> @@ -719,7 +753,7 @@ static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
>>
>>   static void dspi_release_dma(struct fsl_dspi *dspi)
>>   {
>> -	int dma_bufsize = dspi->devtype_data->fifo_size * 2;
>> +	int dma_bufsize = dspi_dma_bufsize(dspi);
>>   	struct fsl_dspi_dma *dma = dspi->dma;
>>
>>   	if (!dma)
>>
>> --
>> 2.34.1
>>