[PATCH v2 3/5] bus: mhi: host: pci_generic: Remove MHI driver and ensure graceful device recovery

Vivek.Pernamitta@quicinc.com posted 5 patches 2 months, 4 weeks ago
There is a newer version of this series
[PATCH v2 3/5] bus: mhi: host: pci_generic: Remove MHI driver and ensure graceful device recovery
Posted by Vivek.Pernamitta@quicinc.com 2 months, 4 weeks ago
From: Vivek Pernamitta <quic_vpernami@quicinc.com>

When the MHI driver is removed from the host side, it is crucial to ensure
graceful recovery of the device. To achieve this, the host driver will
perform the following steps:

1. Disable SRIOV for any SRIOV-enabled devices on the Physical Function.
2. Perform a SOC_RESET on Physical Function (PF).

Disabling SRIOV ensures that all virtual functions are properly shut down,
preventing any potential issues during the reset process. Performing
SOC_RESET on each physical function guarantees that the device is fully
reset and ready for subsequent operations.

Signed-off-by: Vivek Pernamitta <quic_vpernami@quicinc.com>
---
 drivers/bus/mhi/host/pci_generic.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c
index 4bafe93b56c54e2b091786e7fcd68a36c8247b8e..2d1381006293412fbc593316e5c7f0f59ac74da8 100644
--- a/drivers/bus/mhi/host/pci_generic.c
+++ b/drivers/bus/mhi/host/pci_generic.c
@@ -45,6 +45,8 @@
  * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead
  *		   of inband wake support (such as sdx24)
  * @no_m3: M3 not supported
+ * @reset_on_driver_unbind: Set true for devices support SOC reset and
+ *				 perform it when unbinding driver
  */
 struct mhi_pci_dev_info {
 	const struct mhi_controller_config *config;
@@ -58,6 +60,7 @@ struct mhi_pci_dev_info {
 	unsigned int mru_default;
 	bool sideband_wake;
 	bool no_m3;
+	bool reset_on_driver_unbind;
 };
 
 #define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \
@@ -300,6 +303,7 @@ static const struct mhi_pci_dev_info mhi_qcom_qdu100_info = {
 	.dma_data_width = 32,
 	.sideband_wake = false,
 	.no_m3 = true,
+	.reset_on_driver_unbind = true,
 };
 
 static const struct mhi_channel_config mhi_qcom_sa8775p_channels[] = {
@@ -970,6 +974,7 @@ struct mhi_pci_device {
 	struct work_struct recovery_work;
 	struct timer_list health_check_timer;
 	unsigned long status;
+	bool reset_on_driver_unbind;
 };
 
 static int mhi_pci_read_reg(struct mhi_controller *mhi_cntrl,
@@ -1270,6 +1275,11 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	mhi_cntrl->mru = info->mru_default;
 	mhi_cntrl->name = info->name;
 
+	/* Assign reset functionalities only for PF */
+	if (pdev->is_physfn)
+		mhi_pdev->reset_on_driver_unbind = info->reset_on_driver_unbind;
+
+
 	if (info->edl_trigger)
 		mhi_cntrl->edl_trigger = mhi_pci_generic_edl_trigger;
 
@@ -1336,7 +1346,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return err;
 }
 
-static void mhi_pci_remove(struct pci_dev *pdev)
+static void mhi_pci_resource_deinit(struct pci_dev *pdev)
 {
 	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
 	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
@@ -1352,6 +1362,20 @@ static void mhi_pci_remove(struct pci_dev *pdev)
 	/* balancing probe put_noidle */
 	if (pci_pme_capable(pdev, PCI_D3hot))
 		pm_runtime_get_noresume(&pdev->dev);
+}
+
+static void mhi_pci_remove(struct pci_dev *pdev)
+{
+	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+	/* Disable SRIOV */
+	pci_disable_sriov(pdev);
+	mhi_pci_resource_deinit(pdev);
+	if (mhi_pdev->reset_on_driver_unbind) {
+		dev_info(&pdev->dev, "perform SOC reset\n");
+		mhi_soc_reset(mhi_cntrl);
+	}
 
 	mhi_unregister_controller(mhi_cntrl);
 }

-- 
2.34.1
Re: [PATCH v2 3/5] bus: mhi: host: pci_generic: Remove MHI driver and ensure graceful device recovery
Posted by Manivannan Sadhasivam 2 months ago
On Thu, Jul 10, 2025 at 02:28:34PM GMT, Vivek.Pernamitta@quicinc.com wrote:
> From: Vivek Pernamitta <quic_vpernami@quicinc.com>
> 
> When the MHI driver is removed from the host side, it is crucial to ensure
> graceful recovery of the device. To achieve this, the host driver will
> perform the following steps:

Please rewrite the description in an imperative tone.

> 
> 1. Disable SRIOV for any SRIOV-enabled devices on the Physical Function.

You haven't enabled SR-IOV at this point, but only in the next patch. So you
need to swap patches 3 and 4.

> 2. Perform a SOC_RESET on Physical Function (PF).
> 
> Disabling SRIOV ensures that all virtual functions are properly shut down,
> preventing any potential issues during the reset process. Performing
> SOC_RESET on each physical function guarantees that the device is fully

Each physical function? How many PF does this device support? It should be
described somewhere on the total PF and VF it supports.

> reset and ready for subsequent operations.
> 
> Signed-off-by: Vivek Pernamitta <quic_vpernami@quicinc.com>
> ---
>  drivers/bus/mhi/host/pci_generic.c | 26 +++++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c
> index 4bafe93b56c54e2b091786e7fcd68a36c8247b8e..2d1381006293412fbc593316e5c7f0f59ac74da8 100644
> --- a/drivers/bus/mhi/host/pci_generic.c
> +++ b/drivers/bus/mhi/host/pci_generic.c
> @@ -45,6 +45,8 @@
>   * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead
>   *		   of inband wake support (such as sdx24)
>   * @no_m3: M3 not supported
> + * @reset_on_driver_unbind: Set true for devices support SOC reset and
> + *				 perform it when unbinding driver

	reset_on_remove: Reset the device while removing the driver

>   */
>  struct mhi_pci_dev_info {
>  	const struct mhi_controller_config *config;
> @@ -58,6 +60,7 @@ struct mhi_pci_dev_info {
>  	unsigned int mru_default;
>  	bool sideband_wake;
>  	bool no_m3;
> +	bool reset_on_driver_unbind;
>  };
>  
>  #define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \
> @@ -300,6 +303,7 @@ static const struct mhi_pci_dev_info mhi_qcom_qdu100_info = {
>  	.dma_data_width = 32,
>  	.sideband_wake = false,
>  	.no_m3 = true,
> +	.reset_on_driver_unbind = true,
>  };
>  
>  static const struct mhi_channel_config mhi_qcom_sa8775p_channels[] = {
> @@ -970,6 +974,7 @@ struct mhi_pci_device {
>  	struct work_struct recovery_work;
>  	struct timer_list health_check_timer;
>  	unsigned long status;
> +	bool reset_on_driver_unbind;
>  };
>  
>  static int mhi_pci_read_reg(struct mhi_controller *mhi_cntrl,
> @@ -1270,6 +1275,11 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	mhi_cntrl->mru = info->mru_default;
>  	mhi_cntrl->name = info->name;
>  
> +	/* Assign reset functionalities only for PF */

Remove the comment.

> +	if (pdev->is_physfn)
> +		mhi_pdev->reset_on_driver_unbind = info->reset_on_driver_unbind;
> +
> +

Extra newline.

>  	if (info->edl_trigger)
>  		mhi_cntrl->edl_trigger = mhi_pci_generic_edl_trigger;
>  
> @@ -1336,7 +1346,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	return err;
>  }
>  
> -static void mhi_pci_remove(struct pci_dev *pdev)
> +static void mhi_pci_resource_deinit(struct pci_dev *pdev)

Please do not create unsymmetric functions. There is no mhi_pci_resource_init().

>  {
>  	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
>  	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
> @@ -1352,6 +1362,20 @@ static void mhi_pci_remove(struct pci_dev *pdev)
>  	/* balancing probe put_noidle */
>  	if (pci_pme_capable(pdev, PCI_D3hot))
>  		pm_runtime_get_noresume(&pdev->dev);
> +}
> +
> +static void mhi_pci_remove(struct pci_dev *pdev)
> +{
> +	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
> +	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
> +
> +	/* Disable SRIOV */

This one too.

> +	pci_disable_sriov(pdev);
> +	mhi_pci_resource_deinit(pdev);
> +	if (mhi_pdev->reset_on_driver_unbind) {
> +		dev_info(&pdev->dev, "perform SOC reset\n");

This is just a spam, please drop.

- Mani

-- 
மணிவண்ணன் சதாசிவம்
Re: [PATCH v2 3/5] bus: mhi: host: pci_generic: Remove MHI driver and ensure graceful device recovery
Posted by Konrad Dybcio 2 months, 4 weeks ago
On 7/10/25 10:58 AM, Vivek.Pernamitta@quicinc.com wrote:
> From: Vivek Pernamitta <quic_vpernami@quicinc.com>
> 
> When the MHI driver is removed from the host side, it is crucial to ensure
> graceful recovery of the device. To achieve this, the host driver will
> perform the following steps:
> 
> 1. Disable SRIOV for any SRIOV-enabled devices on the Physical Function.
> 2. Perform a SOC_RESET on Physical Function (PF).
> 
> Disabling SRIOV ensures that all virtual functions are properly shut down,
> preventing any potential issues during the reset process. Performing
> SOC_RESET on each physical function guarantees that the device is fully
> reset and ready for subsequent operations.
> 
> Signed-off-by: Vivek Pernamitta <quic_vpernami@quicinc.com>
> ---
>  drivers/bus/mhi/host/pci_generic.c | 26 +++++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c
> index 4bafe93b56c54e2b091786e7fcd68a36c8247b8e..2d1381006293412fbc593316e5c7f0f59ac74da8 100644
> --- a/drivers/bus/mhi/host/pci_generic.c
> +++ b/drivers/bus/mhi/host/pci_generic.c
> @@ -45,6 +45,8 @@
>   * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead
>   *		   of inband wake support (such as sdx24)
>   * @no_m3: M3 not supported
> + * @reset_on_driver_unbind: Set true for devices support SOC reset and
> + *				 perform it when unbinding driver
>   */
>  struct mhi_pci_dev_info {
>  	const struct mhi_controller_config *config;
> @@ -58,6 +60,7 @@ struct mhi_pci_dev_info {
>  	unsigned int mru_default;
>  	bool sideband_wake;
>  	bool no_m3;
> +	bool reset_on_driver_unbind;
>  };
>  
>  #define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \
> @@ -300,6 +303,7 @@ static const struct mhi_pci_dev_info mhi_qcom_qdu100_info = {
>  	.dma_data_width = 32,
>  	.sideband_wake = false,
>  	.no_m3 = true,
> +	.reset_on_driver_unbind = true,

It seems rather unlikely that out off all MHI devices, only QDU100
needs this quirk when working under SR-IOV

>  };
>  
>  static const struct mhi_channel_config mhi_qcom_sa8775p_channels[] = {
> @@ -970,6 +974,7 @@ struct mhi_pci_device {
>  	struct work_struct recovery_work;
>  	struct timer_list health_check_timer;
>  	unsigned long status;
> +	bool reset_on_driver_unbind;
>  };
>  
>  static int mhi_pci_read_reg(struct mhi_controller *mhi_cntrl,
> @@ -1270,6 +1275,11 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	mhi_cntrl->mru = info->mru_default;
>  	mhi_cntrl->name = info->name;
>  
> +	/* Assign reset functionalities only for PF */
> +	if (pdev->is_physfn)
> +		mhi_pdev->reset_on_driver_unbind = info->reset_on_driver_unbind;
> +
> +

Double \n

>  	if (info->edl_trigger)
>  		mhi_cntrl->edl_trigger = mhi_pci_generic_edl_trigger;
>  
> @@ -1336,7 +1346,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	return err;
>  }
>  
> -static void mhi_pci_remove(struct pci_dev *pdev)
> +static void mhi_pci_resource_deinit(struct pci_dev *pdev)
>  {
>  	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
>  	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
> @@ -1352,6 +1362,20 @@ static void mhi_pci_remove(struct pci_dev *pdev)
>  	/* balancing probe put_noidle */
>  	if (pci_pme_capable(pdev, PCI_D3hot))
>  		pm_runtime_get_noresume(&pdev->dev);
> +}
> +
> +static void mhi_pci_remove(struct pci_dev *pdev)
> +{
> +	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
> +	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
> +
> +	/* Disable SRIOV */
> +	pci_disable_sriov(pdev);
> +	mhi_pci_resource_deinit(pdev);
> +	if (mhi_pdev->reset_on_driver_unbind) {

Can we not simply check for pdev->is_physfn?

> +		dev_info(&pdev->dev, "perform SOC reset\n");

Is the logspam really necessary?

> +		mhi_soc_reset(mhi_cntrl);
> +	}

Konrad
Re: [PATCH v2 3/5] bus: mhi: host: pci_generic: Remove MHI driver and ensure graceful device recovery
Posted by Vivek Pernamitta 2 months, 2 weeks ago

On 7/10/2025 6:12 PM, Konrad Dybcio wrote:
> On 7/10/25 10:58 AM, Vivek.Pernamitta@quicinc.com wrote:
>> From: Vivek Pernamitta <quic_vpernami@quicinc.com>
>>
>> When the MHI driver is removed from the host side, it is crucial to ensure
>> graceful recovery of the device. To achieve this, the host driver will
>> perform the following steps:
>>
>> 1. Disable SRIOV for any SRIOV-enabled devices on the Physical Function.
>> 2. Perform a SOC_RESET on Physical Function (PF).
>>
>> Disabling SRIOV ensures that all virtual functions are properly shut down,
>> preventing any potential issues during the reset process. Performing
>> SOC_RESET on each physical function guarantees that the device is fully
>> reset and ready for subsequent operations.
>>
>> Signed-off-by: Vivek Pernamitta <quic_vpernami@quicinc.com>
>> ---
>>   drivers/bus/mhi/host/pci_generic.c | 26 +++++++++++++++++++++++++-
>>   1 file changed, 25 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c
>> index 4bafe93b56c54e2b091786e7fcd68a36c8247b8e..2d1381006293412fbc593316e5c7f0f59ac74da8 100644
>> --- a/drivers/bus/mhi/host/pci_generic.c
>> +++ b/drivers/bus/mhi/host/pci_generic.c
>> @@ -45,6 +45,8 @@
>>    * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead
>>    *		   of inband wake support (such as sdx24)
>>    * @no_m3: M3 not supported
>> + * @reset_on_driver_unbind: Set true for devices support SOC reset and
>> + *				 perform it when unbinding driver
>>    */
>>   struct mhi_pci_dev_info {
>>   	const struct mhi_controller_config *config;
>> @@ -58,6 +60,7 @@ struct mhi_pci_dev_info {
>>   	unsigned int mru_default;
>>   	bool sideband_wake;
>>   	bool no_m3;
>> +	bool reset_on_driver_unbind;
>>   };
>>   
>>   #define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \
>> @@ -300,6 +303,7 @@ static const struct mhi_pci_dev_info mhi_qcom_qdu100_info = {
>>   	.dma_data_width = 32,
>>   	.sideband_wake = false,
>>   	.no_m3 = true,
>> +	.reset_on_driver_unbind = true,
> 
> It seems rather unlikely that out off all MHI devices, only QDU100
> needs this quirk when working under SR-IOV
The reset_on_driver_unbind flag has been explicitly added for the QDU100
device. While other devices might enter RAMDUMP mode and wait when a SOC
reset is issued, the QDU100 device will pass through without entering
RAMDUMP mode
> 
>>   };
>>   
>>   static const struct mhi_channel_config mhi_qcom_sa8775p_channels[] = {
>> @@ -970,6 +974,7 @@ struct mhi_pci_device {
>>   	struct work_struct recovery_work;
>>   	struct timer_list health_check_timer;
>>   	unsigned long status;
>> +	bool reset_on_driver_unbind;
>>   };
>>   
>>   static int mhi_pci_read_reg(struct mhi_controller *mhi_cntrl,
>> @@ -1270,6 +1275,11 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>   	mhi_cntrl->mru = info->mru_default;
>>   	mhi_cntrl->name = info->name;
>>   
>> +	/* Assign reset functionalities only for PF */
>> +	if (pdev->is_physfn)
>> +		mhi_pdev->reset_on_driver_unbind = info->reset_on_driver_unbind;
>> +
>> +
> 
> Double \n
sure will correct in next patchset
> 
>>   	if (info->edl_trigger)
>>   		mhi_cntrl->edl_trigger = mhi_pci_generic_edl_trigger;
>>   
>> @@ -1336,7 +1346,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>   	return err;
>>   }
>>   
>> -static void mhi_pci_remove(struct pci_dev *pdev)
>> +static void mhi_pci_resource_deinit(struct pci_dev *pdev)
>>   {
>>   	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
>>   	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
>> @@ -1352,6 +1362,20 @@ static void mhi_pci_remove(struct pci_dev *pdev)
>>   	/* balancing probe put_noidle */
>>   	if (pci_pme_capable(pdev, PCI_D3hot))
>>   		pm_runtime_get_noresume(&pdev->dev);
>> +}
>> +
>> +static void mhi_pci_remove(struct pci_dev *pdev)
>> +{
>> +	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
>> +	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
>> +
>> +	/* Disable SRIOV */
>> +	pci_disable_sriov(pdev);
>> +	mhi_pci_resource_deinit(pdev);
>> +	if (mhi_pdev->reset_on_driver_unbind) {
> 
> Can we not simply check for pdev->is_physfn?
The reset_on_driver_unbind flag has been explicitly added for the QDU100 
device. While other devices might enter RAMDUMP mode and wait when a SOC 
reset is issued, the QDU100 device will pass through without entering 
RAMDUMP mode.
> 
>> +		dev_info(&pdev->dev, "perform SOC reset\n");
> 
> Is the logspam really necessary?
> 
>> +		mhi_soc_reset(mhi_cntrl);
>> +	}
> 
> Konrad
Re: [PATCH v2 3/5] bus: mhi: host: pci_generic: Remove MHI driver and ensure graceful device recovery
Posted by Konrad Dybcio 2 months, 2 weeks ago
On 7/23/25 2:11 PM, Vivek Pernamitta wrote:
> 
> 
> On 7/10/2025 6:12 PM, Konrad Dybcio wrote:
>> On 7/10/25 10:58 AM, Vivek.Pernamitta@quicinc.com wrote:
>>> From: Vivek Pernamitta <quic_vpernami@quicinc.com>
>>>
>>> When the MHI driver is removed from the host side, it is crucial to ensure
>>> graceful recovery of the device. To achieve this, the host driver will
>>> perform the following steps:
>>>
>>> 1. Disable SRIOV for any SRIOV-enabled devices on the Physical Function.
>>> 2. Perform a SOC_RESET on Physical Function (PF).
>>>
>>> Disabling SRIOV ensures that all virtual functions are properly shut down,
>>> preventing any potential issues during the reset process. Performing
>>> SOC_RESET on each physical function guarantees that the device is fully
>>> reset and ready for subsequent operations.
>>>
>>> Signed-off-by: Vivek Pernamitta <quic_vpernami@quicinc.com>
>>> ---

[...]

>> It seems rather unlikely that out off all MHI devices, only QDU100
>> needs this quirk when working under SR-IOV
> The reset_on_driver_unbind flag has been explicitly added for the QDU100
> device. While other devices might enter RAMDUMP mode and wait when a SOC
> reset is issued, the QDU100 device will pass through without entering
> RAMDUMP mode

Rather inconveniently, this is not something that you mentioned in
the commit message.. Especially in the middle of a series that focuses
on enabling SR-IOV which suggests it's strictly related to virtualization,
making it difficult or impossible to understand the problem that this
patch is actually solving.

Please rewrite the commit message.

Konrad