[PATCH v2 RESEND 1/3] dmaengine: qcom: gpi: Add GPI Block event interrupt support

Jyothi Kumar Seerapu posted 3 patches 1 week, 5 days ago
There is a newer version of this series
[PATCH v2 RESEND 1/3] dmaengine: qcom: gpi: Add GPI Block event interrupt support
Posted by Jyothi Kumar Seerapu 1 week, 5 days ago
GSI hardware generates an interrupt for each transfer completion.
For multiple messages within a single transfer, this results
in receiving N interrupts for N messages, which can introduce
significant software interrupt latency. To mitigate this latency,
utilize Block Event Interrupt (BEI) only when an interrupt is necessary.
When using BEI, consider splitting a single multi-message transfer into
chunks of 8. This approach can enhance overall transfer time and
efficiency.

Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
---

v1 -> v2: 
   - Changed dma_addr type from array of pointers to array.
   - To support BEI functionality with the TRE size of 64 defined in GPI driver,
     updated QCOM_GPI_MAX_NUM_MSGS to 16 and NUM_MSGS_PER_IRQ to 8.
 
 drivers/dma/qcom/gpi.c           | 49 ++++++++++++++++++++++++++++++++
 include/linux/dma/qcom-gpi-dma.h | 37 ++++++++++++++++++++++++
 2 files changed, 86 insertions(+)

diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
index 52a7c8f2498f..a98de3178764 100644
--- a/drivers/dma/qcom/gpi.c
+++ b/drivers/dma/qcom/gpi.c
@@ -1693,6 +1693,9 @@ static int gpi_create_i2c_tre(struct gchan *chan, struct gpi_desc *desc,
 
 		tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
 		tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
+
+		if (i2c->flags & QCOM_GPI_BLOCK_EVENT_IRQ)
+			tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_BEI);
 	}
 
 	for (i = 0; i < tre_idx; i++)
@@ -2098,6 +2101,52 @@ static int gpi_find_avail_gpii(struct gpi_dev *gpi_dev, u32 seid)
 	return -EIO;
 }
 
+/**
+ * gpi_multi_desc_process() - Process received transfers from GSI HW
+ * @dev: pointer to the corresponding dev node
+ * @multi_xfer: pointer to the gpi_multi_xfer
+ * @num_xfers: total number of transfers
+ * @transfer_timeout_msecs: transfer timeout value
+ * @transfer_comp: completion object of the transfer
+ *
+ * This function is used to process the received transfers based on the
+ * completion events
+ *
+ * Return: On success returns 0, otherwise return error code
+ */
+int gpi_multi_desc_process(struct device *dev, struct gpi_multi_xfer *multi_xfer,
+			   u32 num_xfers, u32 transfer_timeout_msecs,
+			   struct completion *transfer_comp)
+{
+	int i;
+	u32 max_irq_cnt, time_left;
+
+	max_irq_cnt = num_xfers / NUM_MSGS_PER_IRQ;
+	if (num_xfers % NUM_MSGS_PER_IRQ)
+		max_irq_cnt++;
+
+	/*
+	 * Wait for the interrupts of the processed transfers in multiple
+	 * of 64 and for the last transfer. If the hardware is fast and
+	 * already processed all the transfers then no need to wait.
+	 */
+	for (i = 0; i < max_irq_cnt; i++) {
+		reinit_completion(transfer_comp);
+		if (max_irq_cnt != multi_xfer->irq_cnt) {
+			time_left = wait_for_completion_timeout(transfer_comp,
+								transfer_timeout_msecs);
+			if (!time_left) {
+				dev_err(dev, "%s: Transfer timeout\n", __func__);
+				return -ETIMEDOUT;
+			}
+		}
+		if (num_xfers > multi_xfer->msg_idx_cnt)
+			return 0;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gpi_multi_desc_process);
+
 /* gpi_of_dma_xlate: open client requested channel */
 static struct dma_chan *gpi_of_dma_xlate(struct of_phandle_args *args,
 					 struct of_dma *of_dma)
diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h
index 6680dd1a43c6..1341ff0db808 100644
--- a/include/linux/dma/qcom-gpi-dma.h
+++ b/include/linux/dma/qcom-gpi-dma.h
@@ -15,6 +15,12 @@ enum spi_transfer_cmd {
 	SPI_DUPLEX,
 };
 
+#define QCOM_GPI_BLOCK_EVENT_IRQ	BIT(0)
+
+#define QCOM_GPI_MAX_NUM_MSGS		16
+#define NUM_MSGS_PER_IRQ		8
+#define MIN_NUM_OF_MSGS_MULTI_DESC	4
+
 /**
  * struct gpi_spi_config - spi config for peripheral
  *
@@ -51,6 +57,29 @@ enum i2c_op {
 	I2C_READ,
 };
 
+/**
+ * struct gpi_multi_xfer - Used for multi transfer support
+ *
+ * @msg_idx_cnt: message index for the transfer
+ * @buf_idx: dma buffer index
+ * @unmap_msg_cnt: unampped transfer index
+ * @freed_msg_cnt: freed transfer index
+ * @irq_cnt: received interrupt count
+ * @irq_msg_cnt: transfer message count for the received irqs
+ * @dma_buf: virtual address of the buffer
+ * @dma_addr: dma address of the buffer
+ */
+struct gpi_multi_xfer {
+	u32 msg_idx_cnt;
+	u32 buf_idx;
+	u32 unmap_msg_cnt;
+	u32 freed_msg_cnt;
+	u32 irq_cnt;
+	u32 irq_msg_cnt;
+	void *dma_buf[QCOM_GPI_MAX_NUM_MSGS];
+	dma_addr_t dma_addr[QCOM_GPI_MAX_NUM_MSGS];
+};
+
 /**
  * struct gpi_i2c_config - i2c config for peripheral
  *
@@ -65,6 +94,8 @@ enum i2c_op {
  * @rx_len: receive length for buffer
  * @op: i2c cmd
  * @muli-msg: is part of multi i2c r-w msgs
+ * @flags: true for block event interrupt support
+ * @multi_xfer: indicates transfer has multi messages
  */
 struct gpi_i2c_config {
 	u8 set_config;
@@ -78,6 +109,12 @@ struct gpi_i2c_config {
 	u32 rx_len;
 	enum i2c_op op;
 	bool multi_msg;
+	u8 flags;
+	struct gpi_multi_xfer multi_xfer;
 };
 
+int gpi_multi_desc_process(struct device *dev, struct gpi_multi_xfer *multi_xfer,
+			   u32 num_xfers, u32 tranfer_timeout_msecs,
+			   struct completion *transfer_comp);
+
 #endif /* QCOM_GPI_DMA_H */
-- 
2.17.1
Re: [PATCH v2 RESEND 1/3] dmaengine: qcom: gpi: Add GPI Block event interrupt support
Posted by Bjorn Andersson 1 week, 4 days ago
On Mon, Nov 11, 2024 at 07:32:42PM +0530, Jyothi Kumar Seerapu wrote:
> GSI hardware generates an interrupt for each transfer completion.
> For multiple messages within a single transfer, this results
> in receiving N interrupts for N messages, which can introduce
> significant software interrupt latency.

Here's an excellent opportunity for splitting your problem description
and solution description in two easy to read paragraphs by adding some
newlines.

> To mitigate this latency,
> utilize Block Event Interrupt (BEI) only when an interrupt is necessary.
> When using BEI, consider splitting a single multi-message transfer into
> chunks of 8. This approach can enhance overall transfer time and

The reason for the number 8 must be documented.

"This approach..." wouldn't hurt from having it's own paragraph once
again.

> efficiency.
> 
> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
> ---
> 
> v1 -> v2: 
>    - Changed dma_addr type from array of pointers to array.
>    - To support BEI functionality with the TRE size of 64 defined in GPI driver,
>      updated QCOM_GPI_MAX_NUM_MSGS to 16 and NUM_MSGS_PER_IRQ to 8.
>  
>  drivers/dma/qcom/gpi.c           | 49 ++++++++++++++++++++++++++++++++
>  include/linux/dma/qcom-gpi-dma.h | 37 ++++++++++++++++++++++++
>  2 files changed, 86 insertions(+)
> 
> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
> index 52a7c8f2498f..a98de3178764 100644
> --- a/drivers/dma/qcom/gpi.c
> +++ b/drivers/dma/qcom/gpi.c
> @@ -1693,6 +1693,9 @@ static int gpi_create_i2c_tre(struct gchan *chan, struct gpi_desc *desc,
>  
>  		tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
>  		tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
> +
> +		if (i2c->flags & QCOM_GPI_BLOCK_EVENT_IRQ)
> +			tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_BEI);
>  	}
>  
>  	for (i = 0; i < tre_idx; i++)
> @@ -2098,6 +2101,52 @@ static int gpi_find_avail_gpii(struct gpi_dev *gpi_dev, u32 seid)
>  	return -EIO;
>  }
>  
> +/**
> + * gpi_multi_desc_process() - Process received transfers from GSI HW
> + * @dev: pointer to the corresponding dev node
> + * @multi_xfer: pointer to the gpi_multi_xfer
> + * @num_xfers: total number of transfers
> + * @transfer_timeout_msecs: transfer timeout value
> + * @transfer_comp: completion object of the transfer
> + *
> + * This function is used to process the received transfers based on the
> + * completion events

As far as I can tell it doesn't "process" anything. All it does is
reinit the completion (n + 7) / 8 times, and for the first n / 8
iterations it will wait for an externally defined completion.

Why is this function even defined here, it solely operates on parameters
coming from the I2C driver?

> + *
> + * Return: On success returns 0, otherwise return error code
> + */
> +int gpi_multi_desc_process(struct device *dev, struct gpi_multi_xfer *multi_xfer,
> +			   u32 num_xfers, u32 transfer_timeout_msecs,
> +			   struct completion *transfer_comp)
> +{
> +	int i;
> +	u32 max_irq_cnt, time_left;
> +
> +	max_irq_cnt = num_xfers / NUM_MSGS_PER_IRQ;
> +	if (num_xfers % NUM_MSGS_PER_IRQ)
> +		max_irq_cnt++;
> +
> +	/*
> +	 * Wait for the interrupts of the processed transfers in multiple
> +	 * of 64 and for the last transfer. If the hardware is fast and

I'm confused, where does this 64 come from?

> +	 * already processed all the transfers then no need to wait.
> +	 */
> +	for (i = 0; i < max_irq_cnt; i++) {
> +		reinit_completion(transfer_comp);

I'm trying to convince myself that this isn't racey, but the split
ownership of updating and checking multi_xfer->irq_cnt between the GPI
and I2C drivers is just too hard for me to follow.

> +		if (max_irq_cnt != multi_xfer->irq_cnt) {
> +			time_left = wait_for_completion_timeout(transfer_comp,
> +								transfer_timeout_msecs);
> +			if (!time_left) {
> +				dev_err(dev, "%s: Transfer timeout\n", __func__);
> +				return -ETIMEDOUT;
> +			}
> +		}
> +		if (num_xfers > multi_xfer->msg_idx_cnt)
> +			return 0;
> +	}
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(gpi_multi_desc_process);

The dmaengine framework is expected to provide an abstraction between
clients and DMA engines, so this doesn't look right.

> +
>  /* gpi_of_dma_xlate: open client requested channel */
>  static struct dma_chan *gpi_of_dma_xlate(struct of_phandle_args *args,
>  					 struct of_dma *of_dma)
> diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h
> index 6680dd1a43c6..1341ff0db808 100644
> --- a/include/linux/dma/qcom-gpi-dma.h
> +++ b/include/linux/dma/qcom-gpi-dma.h
> @@ -15,6 +15,12 @@ enum spi_transfer_cmd {
>  	SPI_DUPLEX,
>  };
>  
> +#define QCOM_GPI_BLOCK_EVENT_IRQ	BIT(0)
> +
> +#define QCOM_GPI_MAX_NUM_MSGS		16
> +#define NUM_MSGS_PER_IRQ		8
> +#define MIN_NUM_OF_MSGS_MULTI_DESC	4

Prefixing these QCOM_GPI_ seems like an excellent idea. Still puzzled
about the numbers 8 and 4 though, are they universal for all variants of
GPI or are they just arbitrary numbers picked by experimentation?

> +
>  /**
>   * struct gpi_spi_config - spi config for peripheral
>   *
> @@ -51,6 +57,29 @@ enum i2c_op {
>  	I2C_READ,
>  };
>  
> +/**
> + * struct gpi_multi_xfer - Used for multi transfer support
> + *
> + * @msg_idx_cnt: message index for the transfer
> + * @buf_idx: dma buffer index
> + * @unmap_msg_cnt: unampped transfer index

s/unampped/unmapped

> + * @freed_msg_cnt: freed transfer index
> + * @irq_cnt: received interrupt count
> + * @irq_msg_cnt: transfer message count for the received irqs
> + * @dma_buf: virtual address of the buffer
> + * @dma_addr: dma address of the buffer

"the buffer"? There's up to 16 of them...


As mentioned above, I'm skeptical about this custom API - but if we
were to go this route, the exact responsibilities and semantics should
be documented.

Regards,
Bjorn

> + */
> +struct gpi_multi_xfer {
> +	u32 msg_idx_cnt;
> +	u32 buf_idx;
> +	u32 unmap_msg_cnt;
> +	u32 freed_msg_cnt;
> +	u32 irq_cnt;
> +	u32 irq_msg_cnt;
> +	void *dma_buf[QCOM_GPI_MAX_NUM_MSGS];
> +	dma_addr_t dma_addr[QCOM_GPI_MAX_NUM_MSGS];
> +};
> +
>  /**
>   * struct gpi_i2c_config - i2c config for peripheral
>   *
> @@ -65,6 +94,8 @@ enum i2c_op {
>   * @rx_len: receive length for buffer
>   * @op: i2c cmd
>   * @muli-msg: is part of multi i2c r-w msgs
> + * @flags: true for block event interrupt support
> + * @multi_xfer: indicates transfer has multi messages
>   */
>  struct gpi_i2c_config {
>  	u8 set_config;
> @@ -78,6 +109,12 @@ struct gpi_i2c_config {
>  	u32 rx_len;
>  	enum i2c_op op;
>  	bool multi_msg;
> +	u8 flags;
> +	struct gpi_multi_xfer multi_xfer;
>  };
>  
> +int gpi_multi_desc_process(struct device *dev, struct gpi_multi_xfer *multi_xfer,
> +			   u32 num_xfers, u32 tranfer_timeout_msecs,
> +			   struct completion *transfer_comp);
> +
>  #endif /* QCOM_GPI_DMA_H */
> -- 
> 2.17.1
> 
>
Re: [PATCH v2 RESEND 1/3] dmaengine: qcom: gpi: Add GPI Block event interrupt support
Posted by Jyothi Kumar Seerapu 2 days, 7 hours ago

On 11/12/2024 10:26 AM, Bjorn Andersson wrote:
> On Mon, Nov 11, 2024 at 07:32:42PM +0530, Jyothi Kumar Seerapu wrote:
>> GSI hardware generates an interrupt for each transfer completion.
>> For multiple messages within a single transfer, this results
>> in receiving N interrupts for N messages, which can introduce
>> significant software interrupt latency.
> 
> Here's an excellent opportunity for splitting your problem description
> and solution description in two easy to read paragraphs by adding some
> newlines.
Thanks, Done.
> 
>> To mitigate this latency,
>> utilize Block Event Interrupt (BEI) only when an interrupt is necessary.
>> When using BEI, consider splitting a single multi-message transfer into
>> chunks of 8. This approach can enhance overall transfer time and
> 
> The reason for the number 8 must be documented.
Its documented in "qcom-gpi-dma.h" file.

> 
> "This approach..." wouldn't hurt from having it's own paragraph once
> again.

Done
> 
>> efficiency.
>>
>> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
>> ---
>>
>> v1 -> v2:
>>     - Changed dma_addr type from array of pointers to array.
>>     - To support BEI functionality with the TRE size of 64 defined in GPI driver,
>>       updated QCOM_GPI_MAX_NUM_MSGS to 16 and NUM_MSGS_PER_IRQ to 8.
>>   
>>   drivers/dma/qcom/gpi.c           | 49 ++++++++++++++++++++++++++++++++
>>   include/linux/dma/qcom-gpi-dma.h | 37 ++++++++++++++++++++++++
>>   2 files changed, 86 insertions(+)
>>
>> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
>> index 52a7c8f2498f..a98de3178764 100644
>> --- a/drivers/dma/qcom/gpi.c
>> +++ b/drivers/dma/qcom/gpi.c
>> @@ -1693,6 +1693,9 @@ static int gpi_create_i2c_tre(struct gchan *chan, struct gpi_desc *desc,
>>   
>>   		tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
>>   		tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
>> +
>> +		if (i2c->flags & QCOM_GPI_BLOCK_EVENT_IRQ)
>> +			tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_BEI);
>>   	}
>>   
>>   	for (i = 0; i < tre_idx; i++)
>> @@ -2098,6 +2101,52 @@ static int gpi_find_avail_gpii(struct gpi_dev *gpi_dev, u32 seid)
>>   	return -EIO;
>>   }
>>   
>> +/**
>> + * gpi_multi_desc_process() - Process received transfers from GSI HW
>> + * @dev: pointer to the corresponding dev node
>> + * @multi_xfer: pointer to the gpi_multi_xfer
>> + * @num_xfers: total number of transfers
>> + * @transfer_timeout_msecs: transfer timeout value
>> + * @transfer_comp: completion object of the transfer
>> + *
>> + * This function is used to process the received transfers based on the
>> + * completion events
> 
> As far as I can tell it doesn't "process" anything. All it does is
> reinit the completion (n + 7) / 8 times, and for the first n / 8
> iterations it will wait for an externally defined completion.
Yes correct. Changed the function name and description.
> 
> Why is this function even defined here, it solely operates on parameters
> coming from the I2C driver?
This function can be used for the other protocols as well and so defined 
here. Please let me know if its not a good idea to make this as common 
function for all required protocols and keep in gpi driver.
> 
>> + *
>> + * Return: On success returns 0, otherwise return error code
>> + */
>> +int gpi_multi_desc_process(struct device *dev, struct gpi_multi_xfer *multi_xfer,
>> +			   u32 num_xfers, u32 transfer_timeout_msecs,
>> +			   struct completion *transfer_comp)
>> +{
>> +	int i;
>> +	u32 max_irq_cnt, time_left;
>> +
>> +	max_irq_cnt = num_xfers / NUM_MSGS_PER_IRQ;
>> +	if (num_xfers % NUM_MSGS_PER_IRQ)
>> +		max_irq_cnt++;
>> +
>> +	/*
>> +	 * Wait for the interrupts of the processed transfers in multiple
>> +	 * of 64 and for the last transfer. If the hardware is fast and
> 
> I'm confused, where does this 64 come from?
It supposed to be 8, thanks for pointing.
> 
>> +	 * already processed all the transfers then no need to wait.
>> +	 */
>> +	for (i = 0; i < max_irq_cnt; i++) {
>> +		reinit_completion(transfer_comp);
> 
> I'm trying to convince myself that this isn't racey, but the split
> ownership of updating and checking multi_xfer->irq_cnt between the GPI
> and I2C drivers is just too hard for me to follow.
If this functionlaity is added into gpi driver, then it can be used for 
i2c and other protocols as well.
Otherwise, this code or function to be added into all protocol drivers.

>> +		if (max_irq_cnt != multi_xfer->irq_cnt) {
>> +			time_left = wait_for_completion_timeout(transfer_comp,
>> +								transfer_timeout_msecs);
>> +			if (!time_left) {
>> +				dev_err(dev, "%s: Transfer timeout\n", __func__);
>> +				return -ETIMEDOUT;
>> +			}
>> +		}
>> +		if (num_xfers > multi_xfer->msg_idx_cnt)
>> +			return 0;
>> +	}
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(gpi_multi_desc_process);
> 
> The dmaengine framework is expected to provide an abstraction between
> clients and DMA engines, so this doesn't look right.
> 
>> +
>>   /* gpi_of_dma_xlate: open client requested channel */
>>   static struct dma_chan *gpi_of_dma_xlate(struct of_phandle_args *args,
>>   					 struct of_dma *of_dma)
>> diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h
>> index 6680dd1a43c6..1341ff0db808 100644
>> --- a/include/linux/dma/qcom-gpi-dma.h
>> +++ b/include/linux/dma/qcom-gpi-dma.h
>> @@ -15,6 +15,12 @@ enum spi_transfer_cmd {
>>   	SPI_DUPLEX,
>>   };
>>   
>> +#define QCOM_GPI_BLOCK_EVENT_IRQ	BIT(0)
>> +
>> +#define QCOM_GPI_MAX_NUM_MSGS		16
>> +#define NUM_MSGS_PER_IRQ		8
>> +#define MIN_NUM_OF_MSGS_MULTI_DESC	4
> 
> Prefixing these QCOM_GPI_ seems like an excellent idea. Still puzzled
> about the numbers 8 and 4 though, are they universal for all variants of
> GPI or are they just arbitrary numbers picked by experimentation?
MIN_NUM_OF_MSGS_MULTI_DESC changed to 2 in V3, so that if the number of 
messages in a transfer are greter than 1, then is_tx_multi_xfer is set.

maximum channel tre's are 64 which is for config TRE, go TRE and DMA(Tx, 
Rx) TRE. 32 messages can't fit to allocate memory with size of 64 and so 
used maximum messages as 16. And plan to use multiple factor of 
MAX_NUM_MSGS for interrupts per messages and so defined NUM_MSGS_PER_IRQ 
to 8.	

> 
>> +
>>   /**
>>    * struct gpi_spi_config - spi config for peripheral
>>    *
>> @@ -51,6 +57,29 @@ enum i2c_op {
>>   	I2C_READ,
>>   };
>>   
>> +/**
>> + * struct gpi_multi_xfer - Used for multi transfer support
>> + *
>> + * @msg_idx_cnt: message index for the transfer
>> + * @buf_idx: dma buffer index
>> + * @unmap_msg_cnt: unampped transfer index
> 
> s/unampped/unmapped
Done
> 
>> + * @freed_msg_cnt: freed transfer index
>> + * @irq_cnt: received interrupt count
>> + * @irq_msg_cnt: transfer message count for the received irqs
>> + * @dma_buf: virtual address of the buffer
>> + * @dma_addr: dma address of the buffer
> 
> "the buffer"? There's up to 16 of them...

Done
> 
> 
> As mentioned above, I'm skeptical about this custom API - but if we
> were to go this route, the exact responsibilities and semantics should
> be documented.

Documentated the API's and other added in this file.
> 
> Regards,
> Bjorn
> 
>> + */
>> +struct gpi_multi_xfer {
>> +	u32 msg_idx_cnt;
>> +	u32 buf_idx;
>> +	u32 unmap_msg_cnt;
>> +	u32 freed_msg_cnt;
>> +	u32 irq_cnt;
>> +	u32 irq_msg_cnt;
>> +	void *dma_buf[QCOM_GPI_MAX_NUM_MSGS];
>> +	dma_addr_t dma_addr[QCOM_GPI_MAX_NUM_MSGS];
>> +};
>> +
>>   /**
>>    * struct gpi_i2c_config - i2c config for peripheral
>>    *
>> @@ -65,6 +94,8 @@ enum i2c_op {
>>    * @rx_len: receive length for buffer
>>    * @op: i2c cmd
>>    * @muli-msg: is part of multi i2c r-w msgs
>> + * @flags: true for block event interrupt support
>> + * @multi_xfer: indicates transfer has multi messages
>>    */
>>   struct gpi_i2c_config {
>>   	u8 set_config;
>> @@ -78,6 +109,12 @@ struct gpi_i2c_config {
>>   	u32 rx_len;
>>   	enum i2c_op op;
>>   	bool multi_msg;
>> +	u8 flags;
>> +	struct gpi_multi_xfer multi_xfer;
>>   };
>>   
>> +int gpi_multi_desc_process(struct device *dev, struct gpi_multi_xfer *multi_xfer,
>> +			   u32 num_xfers, u32 tranfer_timeout_msecs,
>> +			   struct completion *transfer_comp);
>> +
>>   #endif /* QCOM_GPI_DMA_H */
>> -- 
>> 2.17.1
>>
>>
Re: [PATCH v2 RESEND 1/3] dmaengine: qcom: gpi: Add GPI Block event interrupt support
Posted by Andi Shyti 1 week, 4 days ago
Ping, Vinod :-)

Andi

On Mon, Nov 11, 2024 at 07:32:42PM +0530, Jyothi Kumar Seerapu wrote:
> GSI hardware generates an interrupt for each transfer completion.
> For multiple messages within a single transfer, this results
> in receiving N interrupts for N messages, which can introduce
> significant software interrupt latency. To mitigate this latency,
> utilize Block Event Interrupt (BEI) only when an interrupt is necessary.
> When using BEI, consider splitting a single multi-message transfer into
> chunks of 8. This approach can enhance overall transfer time and
> efficiency.
> 
> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
> ---
> 
> v1 -> v2: 
>    - Changed dma_addr type from array of pointers to array.
>    - To support BEI functionality with the TRE size of 64 defined in GPI driver,
>      updated QCOM_GPI_MAX_NUM_MSGS to 16 and NUM_MSGS_PER_IRQ to 8.
>  
>  drivers/dma/qcom/gpi.c           | 49 ++++++++++++++++++++++++++++++++
>  include/linux/dma/qcom-gpi-dma.h | 37 ++++++++++++++++++++++++
>  2 files changed, 86 insertions(+)
> 
> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
> index 52a7c8f2498f..a98de3178764 100644
> --- a/drivers/dma/qcom/gpi.c
> +++ b/drivers/dma/qcom/gpi.c
> @@ -1693,6 +1693,9 @@ static int gpi_create_i2c_tre(struct gchan *chan, struct gpi_desc *desc,
>  
>  		tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
>  		tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
> +
> +		if (i2c->flags & QCOM_GPI_BLOCK_EVENT_IRQ)
> +			tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_BEI);
>  	}
>  
>  	for (i = 0; i < tre_idx; i++)
> @@ -2098,6 +2101,52 @@ static int gpi_find_avail_gpii(struct gpi_dev *gpi_dev, u32 seid)
>  	return -EIO;
>  }
>  
> +/**
> + * gpi_multi_desc_process() - Process received transfers from GSI HW
> + * @dev: pointer to the corresponding dev node
> + * @multi_xfer: pointer to the gpi_multi_xfer
> + * @num_xfers: total number of transfers
> + * @transfer_timeout_msecs: transfer timeout value
> + * @transfer_comp: completion object of the transfer
> + *
> + * This function is used to process the received transfers based on the
> + * completion events
> + *
> + * Return: On success returns 0, otherwise return error code
> + */
> +int gpi_multi_desc_process(struct device *dev, struct gpi_multi_xfer *multi_xfer,
> +			   u32 num_xfers, u32 transfer_timeout_msecs,
> +			   struct completion *transfer_comp)
> +{
> +	int i;
> +	u32 max_irq_cnt, time_left;
> +
> +	max_irq_cnt = num_xfers / NUM_MSGS_PER_IRQ;
> +	if (num_xfers % NUM_MSGS_PER_IRQ)
> +		max_irq_cnt++;
> +
> +	/*
> +	 * Wait for the interrupts of the processed transfers in multiple
> +	 * of 64 and for the last transfer. If the hardware is fast and
> +	 * already processed all the transfers then no need to wait.
> +	 */
> +	for (i = 0; i < max_irq_cnt; i++) {
> +		reinit_completion(transfer_comp);
> +		if (max_irq_cnt != multi_xfer->irq_cnt) {
> +			time_left = wait_for_completion_timeout(transfer_comp,
> +								transfer_timeout_msecs);
> +			if (!time_left) {
> +				dev_err(dev, "%s: Transfer timeout\n", __func__);
> +				return -ETIMEDOUT;
> +			}
> +		}
> +		if (num_xfers > multi_xfer->msg_idx_cnt)
> +			return 0;
> +	}
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(gpi_multi_desc_process);
> +
>  /* gpi_of_dma_xlate: open client requested channel */
>  static struct dma_chan *gpi_of_dma_xlate(struct of_phandle_args *args,
>  					 struct of_dma *of_dma)
> diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h
> index 6680dd1a43c6..1341ff0db808 100644
> --- a/include/linux/dma/qcom-gpi-dma.h
> +++ b/include/linux/dma/qcom-gpi-dma.h
> @@ -15,6 +15,12 @@ enum spi_transfer_cmd {
>  	SPI_DUPLEX,
>  };
>  
> +#define QCOM_GPI_BLOCK_EVENT_IRQ	BIT(0)
> +
> +#define QCOM_GPI_MAX_NUM_MSGS		16
> +#define NUM_MSGS_PER_IRQ		8
> +#define MIN_NUM_OF_MSGS_MULTI_DESC	4
> +
>  /**
>   * struct gpi_spi_config - spi config for peripheral
>   *
> @@ -51,6 +57,29 @@ enum i2c_op {
>  	I2C_READ,
>  };
>  
> +/**
> + * struct gpi_multi_xfer - Used for multi transfer support
> + *
> + * @msg_idx_cnt: message index for the transfer
> + * @buf_idx: dma buffer index
> + * @unmap_msg_cnt: unampped transfer index
> + * @freed_msg_cnt: freed transfer index
> + * @irq_cnt: received interrupt count
> + * @irq_msg_cnt: transfer message count for the received irqs
> + * @dma_buf: virtual address of the buffer
> + * @dma_addr: dma address of the buffer
> + */
> +struct gpi_multi_xfer {
> +	u32 msg_idx_cnt;
> +	u32 buf_idx;
> +	u32 unmap_msg_cnt;
> +	u32 freed_msg_cnt;
> +	u32 irq_cnt;
> +	u32 irq_msg_cnt;
> +	void *dma_buf[QCOM_GPI_MAX_NUM_MSGS];
> +	dma_addr_t dma_addr[QCOM_GPI_MAX_NUM_MSGS];
> +};
> +
>  /**
>   * struct gpi_i2c_config - i2c config for peripheral
>   *
> @@ -65,6 +94,8 @@ enum i2c_op {
>   * @rx_len: receive length for buffer
>   * @op: i2c cmd
>   * @muli-msg: is part of multi i2c r-w msgs
> + * @flags: true for block event interrupt support
> + * @multi_xfer: indicates transfer has multi messages
>   */
>  struct gpi_i2c_config {
>  	u8 set_config;
> @@ -78,6 +109,12 @@ struct gpi_i2c_config {
>  	u32 rx_len;
>  	enum i2c_op op;
>  	bool multi_msg;
> +	u8 flags;
> +	struct gpi_multi_xfer multi_xfer;
>  };
>  
> +int gpi_multi_desc_process(struct device *dev, struct gpi_multi_xfer *multi_xfer,
> +			   u32 num_xfers, u32 tranfer_timeout_msecs,
> +			   struct completion *transfer_comp);
> +
>  #endif /* QCOM_GPI_DMA_H */
> -- 
> 2.17.1
>
Re: [PATCH v2 RESEND 1/3] dmaengine: qcom: gpi: Add GPI Block event interrupt support
Posted by Jyothi Kumar Seerapu 2 days, 7 hours ago

On 11/12/2024 4:06 AM, Andi Shyti wrote:
> Ping, Vinod :-)
Sure, thanks.
> 
> Andi
> 
> On Mon, Nov 11, 2024 at 07:32:42PM +0530, Jyothi Kumar Seerapu wrote:
>> GSI hardware generates an interrupt for each transfer completion.
>> For multiple messages within a single transfer, this results
>> in receiving N interrupts for N messages, which can introduce
>> significant software interrupt latency. To mitigate this latency,
>> utilize Block Event Interrupt (BEI) only when an interrupt is necessary.
>> When using BEI, consider splitting a single multi-message transfer into
>> chunks of 8. This approach can enhance overall transfer time and
>> efficiency.
>>
>> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com>
>> ---
>>
>> v1 -> v2:
>>     - Changed dma_addr type from array of pointers to array.
>>     - To support BEI functionality with the TRE size of 64 defined in GPI driver,
>>       updated QCOM_GPI_MAX_NUM_MSGS to 16 and NUM_MSGS_PER_IRQ to 8.
>>   
>>   drivers/dma/qcom/gpi.c           | 49 ++++++++++++++++++++++++++++++++
>>   include/linux/dma/qcom-gpi-dma.h | 37 ++++++++++++++++++++++++
>>   2 files changed, 86 insertions(+)
>>
>> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
>> index 52a7c8f2498f..a98de3178764 100644
>> --- a/drivers/dma/qcom/gpi.c
>> +++ b/drivers/dma/qcom/gpi.c
>> @@ -1693,6 +1693,9 @@ static int gpi_create_i2c_tre(struct gchan *chan, struct gpi_desc *desc,
>>   
>>   		tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
>>   		tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
>> +
>> +		if (i2c->flags & QCOM_GPI_BLOCK_EVENT_IRQ)
>> +			tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_BEI);
>>   	}
>>   
>>   	for (i = 0; i < tre_idx; i++)
>> @@ -2098,6 +2101,52 @@ static int gpi_find_avail_gpii(struct gpi_dev *gpi_dev, u32 seid)
>>   	return -EIO;
>>   }
>>   
>> +/**
>> + * gpi_multi_desc_process() - Process received transfers from GSI HW
>> + * @dev: pointer to the corresponding dev node
>> + * @multi_xfer: pointer to the gpi_multi_xfer
>> + * @num_xfers: total number of transfers
>> + * @transfer_timeout_msecs: transfer timeout value
>> + * @transfer_comp: completion object of the transfer
>> + *
>> + * This function is used to process the received transfers based on the
>> + * completion events
>> + *
>> + * Return: On success returns 0, otherwise return error code
>> + */
>> +int gpi_multi_desc_process(struct device *dev, struct gpi_multi_xfer *multi_xfer,
>> +			   u32 num_xfers, u32 transfer_timeout_msecs,
>> +			   struct completion *transfer_comp)
>> +{
>> +	int i;
>> +	u32 max_irq_cnt, time_left;
>> +
>> +	max_irq_cnt = num_xfers / NUM_MSGS_PER_IRQ;
>> +	if (num_xfers % NUM_MSGS_PER_IRQ)
>> +		max_irq_cnt++;
>> +
>> +	/*
>> +	 * Wait for the interrupts of the processed transfers in multiple
>> +	 * of 64 and for the last transfer. If the hardware is fast and
>> +	 * already processed all the transfers then no need to wait.
>> +	 */
>> +	for (i = 0; i < max_irq_cnt; i++) {
>> +		reinit_completion(transfer_comp);
>> +		if (max_irq_cnt != multi_xfer->irq_cnt) {
>> +			time_left = wait_for_completion_timeout(transfer_comp,
>> +								transfer_timeout_msecs);
>> +			if (!time_left) {
>> +				dev_err(dev, "%s: Transfer timeout\n", __func__);
>> +				return -ETIMEDOUT;
>> +			}
>> +		}
>> +		if (num_xfers > multi_xfer->msg_idx_cnt)
>> +			return 0;
>> +	}
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(gpi_multi_desc_process);
>> +
>>   /* gpi_of_dma_xlate: open client requested channel */
>>   static struct dma_chan *gpi_of_dma_xlate(struct of_phandle_args *args,
>>   					 struct of_dma *of_dma)
>> diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h
>> index 6680dd1a43c6..1341ff0db808 100644
>> --- a/include/linux/dma/qcom-gpi-dma.h
>> +++ b/include/linux/dma/qcom-gpi-dma.h
>> @@ -15,6 +15,12 @@ enum spi_transfer_cmd {
>>   	SPI_DUPLEX,
>>   };
>>   
>> +#define QCOM_GPI_BLOCK_EVENT_IRQ	BIT(0)
>> +
>> +#define QCOM_GPI_MAX_NUM_MSGS		16
>> +#define NUM_MSGS_PER_IRQ		8
>> +#define MIN_NUM_OF_MSGS_MULTI_DESC	4
>> +
>>   /**
>>    * struct gpi_spi_config - spi config for peripheral
>>    *
>> @@ -51,6 +57,29 @@ enum i2c_op {
>>   	I2C_READ,
>>   };
>>   
>> +/**
>> + * struct gpi_multi_xfer - Used for multi transfer support
>> + *
>> + * @msg_idx_cnt: message index for the transfer
>> + * @buf_idx: dma buffer index
>> + * @unmap_msg_cnt: unampped transfer index
>> + * @freed_msg_cnt: freed transfer index
>> + * @irq_cnt: received interrupt count
>> + * @irq_msg_cnt: transfer message count for the received irqs
>> + * @dma_buf: virtual address of the buffer
>> + * @dma_addr: dma address of the buffer
>> + */
>> +struct gpi_multi_xfer {
>> +	u32 msg_idx_cnt;
>> +	u32 buf_idx;
>> +	u32 unmap_msg_cnt;
>> +	u32 freed_msg_cnt;
>> +	u32 irq_cnt;
>> +	u32 irq_msg_cnt;
>> +	void *dma_buf[QCOM_GPI_MAX_NUM_MSGS];
>> +	dma_addr_t dma_addr[QCOM_GPI_MAX_NUM_MSGS];
>> +};
>> +
>>   /**
>>    * struct gpi_i2c_config - i2c config for peripheral
>>    *
>> @@ -65,6 +94,8 @@ enum i2c_op {
>>    * @rx_len: receive length for buffer
>>    * @op: i2c cmd
>>    * @muli-msg: is part of multi i2c r-w msgs
>> + * @flags: true for block event interrupt support
>> + * @multi_xfer: indicates transfer has multi messages
>>    */
>>   struct gpi_i2c_config {
>>   	u8 set_config;
>> @@ -78,6 +109,12 @@ struct gpi_i2c_config {
>>   	u32 rx_len;
>>   	enum i2c_op op;
>>   	bool multi_msg;
>> +	u8 flags;
>> +	struct gpi_multi_xfer multi_xfer;
>>   };
>>   
>> +int gpi_multi_desc_process(struct device *dev, struct gpi_multi_xfer *multi_xfer,
>> +			   u32 num_xfers, u32 tranfer_timeout_msecs,
>> +			   struct completion *transfer_comp);
>> +
>>   #endif /* QCOM_GPI_DMA_H */
>> -- 
>> 2.17.1
>>