[PATCH] PCI: Reduce delay after FLR of Microsoft MANA devices

grwhyte@linux.microsoft.com posted 1 patch 6 months, 3 weeks ago
There is a newer version of this series
drivers/pci/pci.c    |  3 ++-
drivers/pci/pci.h    |  1 +
drivers/pci/quirks.c | 55 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 58 insertions(+), 1 deletion(-)
[PATCH] PCI: Reduce delay after FLR of Microsoft MANA devices
Posted by grwhyte@linux.microsoft.com 6 months, 3 weeks ago
From: Graham Whyte <grwhyte@linux.microsoft.com>

Add a device-specific reset for Microsoft MANA devices with the FLR
delay reduced from 100ms to 10ms. While this is not compliant with the pci
spec, these devices safely complete the FLR much quicker than 100ms and
this can be reduced to optimize certain scenarios

Signed-off-by: Graham Whyte <grwhyte@linux.microsoft.com>
---
 drivers/pci/pci.c    |  3 ++-
 drivers/pci/pci.h    |  1 +
 drivers/pci/quirks.c | 55 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 9cb1de7658b5..ad2960117acd 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1262,7 +1262,7 @@ void pci_resume_bus(struct pci_bus *bus)
 		pci_walk_bus(bus, pci_resume_one, NULL);
 }
 
-static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
+int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
 {
 	int delay = 1;
 	bool retrain = false;
@@ -1344,6 +1344,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(pci_dev_wait);
 
 /**
  * pci_power_up - Put the given device into D0
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f2958318d259..3a98e00eb02a 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -109,6 +109,7 @@ void pci_init_reset_methods(struct pci_dev *dev);
 int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
 int pci_bus_error_reset(struct pci_dev *dev);
 int __pci_reset_bus(struct pci_bus *bus);
+int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout);
 
 struct pci_cap_saved_data {
 	u16		cap_nr;
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index c354276d4bac..94bd2c82cbbd 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4205,6 +4205,55 @@ static int reset_hinic_vf_dev(struct pci_dev *pdev, bool probe)
 	return 0;
 }
 
+#define MSFT_PCIE_RESET_READY_POLL_MS 60000 /* msec */
+#define MICROSOFT_2051_SVC 0xb210
+#define MICROSOFT_2051_MANA_MGMT 0x00b8
+#define MICROSOFT_2051_MANA_MGMT_GFT 0xb290
+
+/* Device specific reset for msft GFT and gdma devices */
+static int msft_pcie_flr(struct pci_dev *dev)
+{
+	if (!pci_wait_for_pending_transaction(dev))
+		pci_err(dev, "timed out waiting for pending transaction; "
+			"performing function level reset anyway\n");
+
+	pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
+
+	if (dev->imm_ready)
+		return 0;
+
+	/*
+	 * Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within
+	 * 100ms, but may silently discard requests while the FLR is in
+	 * progress. However, 100ms is much longer than required for modern
+	 * devices, so we can afford to reduce the timeout to 10ms.
+	 */
+	usleep_range(10000, 10001);
+
+	return pci_dev_wait(dev, "FLR", MSFT_PCIE_RESET_READY_POLL_MS);
+}
+
+/*
+ * msft_pcie_reset_flr - initiate a PCIe function level reset
+ * @dev: device to reset
+ * @probe: if true, return 0 if device can be reset this way
+ *
+ * Initiate a function level reset on @dev.
+ */
+static int msft_pcie_reset_flr(struct pci_dev *dev, bool probe)
+{
+	if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
+		return -ENOTTY;
+
+	if (!(dev->devcap & PCI_EXP_DEVCAP_FLR))
+		return -ENOTTY;
+
+	if (probe)
+		return 0;
+
+	return msft_pcie_flr(dev);
+}
+
 static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF,
 		 reset_intel_82599_sfp_virtfn },
@@ -4220,6 +4269,12 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
 		reset_chelsio_generic_dev },
 	{ PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF,
 		reset_hinic_vf_dev },
+	{ PCI_VENDOR_ID_MICROSOFT, MICROSOFT_2051_SVC,
+		msft_pcie_reset_flr},
+	{ PCI_VENDOR_ID_MICROSOFT, MICROSOFT_2051_MANA_MGMT,
+		msft_pcie_reset_flr},
+	{ PCI_VENDOR_ID_MICROSOFT, MICROSOFT_2051_MANA_MGMT_GFT,
+		msft_pcie_reset_flr},
 	{ 0 }
 };
 
-- 
2.25.1
Re: [PATCH] PCI: Reduce delay after FLR of Microsoft MANA devices
Posted by Bjorn Helgaas 6 months, 3 weeks ago
On Wed, May 21, 2025 at 11:15:39PM +0000, grwhyte@linux.microsoft.com wrote:
> From: Graham Whyte <grwhyte@linux.microsoft.com>
> 
> Add a device-specific reset for Microsoft MANA devices with the FLR
> delay reduced from 100ms to 10ms. While this is not compliant with the pci
> spec, these devices safely complete the FLR much quicker than 100ms and
> this can be reduced to optimize certain scenarios

It looks like this could be done generically if the device advertised
the Readiness Time Reporting Capability (PCIe r6.0, sec 7.9.16) and
Linux supported that Capability (which it currently does not)?

From 7.9.16.3:

  FLR Time - is the time that the Function requires to become
  Configuration-Ready after it was issued an FLR.

Does the device advertise that capability?  It would be much nicer if
we didn't have to add a device-specific quirk for this.

> Signed-off-by: Graham Whyte <grwhyte@linux.microsoft.com>
> ---
>  drivers/pci/pci.c    |  3 ++-
>  drivers/pci/pci.h    |  1 +
>  drivers/pci/quirks.c | 55 ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 9cb1de7658b5..ad2960117acd 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -1262,7 +1262,7 @@ void pci_resume_bus(struct pci_bus *bus)
>  		pci_walk_bus(bus, pci_resume_one, NULL);
>  }
>  
> -static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
> +int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
>  {
>  	int delay = 1;
>  	bool retrain = false;
> @@ -1344,6 +1344,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
>  
>  	return 0;
>  }
> +EXPORT_SYMBOL_GPL(pci_dev_wait);
>  
>  /**
>   * pci_power_up - Put the given device into D0
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index f2958318d259..3a98e00eb02a 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -109,6 +109,7 @@ void pci_init_reset_methods(struct pci_dev *dev);
>  int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
>  int pci_bus_error_reset(struct pci_dev *dev);
>  int __pci_reset_bus(struct pci_bus *bus);
> +int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout);
>  
>  struct pci_cap_saved_data {
>  	u16		cap_nr;
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index c354276d4bac..94bd2c82cbbd 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -4205,6 +4205,55 @@ static int reset_hinic_vf_dev(struct pci_dev *pdev, bool probe)
>  	return 0;
>  }
>  
> +#define MSFT_PCIE_RESET_READY_POLL_MS 60000 /* msec */
> +#define MICROSOFT_2051_SVC 0xb210
> +#define MICROSOFT_2051_MANA_MGMT 0x00b8
> +#define MICROSOFT_2051_MANA_MGMT_GFT 0xb290
> +
> +/* Device specific reset for msft GFT and gdma devices */
> +static int msft_pcie_flr(struct pci_dev *dev)
> +{
> +	if (!pci_wait_for_pending_transaction(dev))
> +		pci_err(dev, "timed out waiting for pending transaction; "
> +			"performing function level reset anyway\n");
> +
> +	pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
> +
> +	if (dev->imm_ready)
> +		return 0;
> +
> +	/*
> +	 * Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within
> +	 * 100ms, but may silently discard requests while the FLR is in
> +	 * progress. However, 100ms is much longer than required for modern
> +	 * devices, so we can afford to reduce the timeout to 10ms.
> +	 */
> +	usleep_range(10000, 10001);
> +
> +	return pci_dev_wait(dev, "FLR", MSFT_PCIE_RESET_READY_POLL_MS);
> +}
> +
> +/*
> + * msft_pcie_reset_flr - initiate a PCIe function level reset
> + * @dev: device to reset
> + * @probe: if true, return 0 if device can be reset this way
> + *
> + * Initiate a function level reset on @dev.
> + */
> +static int msft_pcie_reset_flr(struct pci_dev *dev, bool probe)
> +{
> +	if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
> +		return -ENOTTY;
> +
> +	if (!(dev->devcap & PCI_EXP_DEVCAP_FLR))
> +		return -ENOTTY;
> +
> +	if (probe)
> +		return 0;
> +
> +	return msft_pcie_flr(dev);
> +}
> +
>  static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
>  	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF,
>  		 reset_intel_82599_sfp_virtfn },
> @@ -4220,6 +4269,12 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
>  		reset_chelsio_generic_dev },
>  	{ PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF,
>  		reset_hinic_vf_dev },
> +	{ PCI_VENDOR_ID_MICROSOFT, MICROSOFT_2051_SVC,
> +		msft_pcie_reset_flr},
> +	{ PCI_VENDOR_ID_MICROSOFT, MICROSOFT_2051_MANA_MGMT,
> +		msft_pcie_reset_flr},
> +	{ PCI_VENDOR_ID_MICROSOFT, MICROSOFT_2051_MANA_MGMT_GFT,
> +		msft_pcie_reset_flr},
>  	{ 0 }
>  };
>  
> -- 
> 2.25.1
>
Re: [PATCH] PCI: Reduce delay after FLR of Microsoft MANA devices
Posted by Graham Whyte 6 months, 2 weeks ago

On 5/23/2025 9:39 AM, Bjorn Helgaas wrote:
> On Wed, May 21, 2025 at 11:15:39PM +0000, grwhyte@linux.microsoft.com wrote:
>> From: Graham Whyte <grwhyte@linux.microsoft.com>
>>
>> Add a device-specific reset for Microsoft MANA devices with the FLR
>> delay reduced from 100ms to 10ms. While this is not compliant with the pci
>> spec, these devices safely complete the FLR much quicker than 100ms and
>> this can be reduced to optimize certain scenarios
> 
> It looks like this could be done generically if the device advertised
> the Readiness Time Reporting Capability (PCIe r6.0, sec 7.9.16) and
> Linux supported that Capability (which it currently does not)?
> 
>>From 7.9.16.3:
> 
>   FLR Time - is the time that the Function requires to become
>   Configuration-Ready after it was issued an FLR.
> 
> Does the device advertise that capability?  It would be much nicer if
> we didn't have to add a device-specific quirk for this.
> 

Unfortunately our device doesn't support the readiness time 
reporting capability.

>> Signed-off-by: Graham Whyte <grwhyte@linux.microsoft.com>
>> ---
>>  drivers/pci/pci.c    |  3 ++-
>>  drivers/pci/pci.h    |  1 +
>>  drivers/pci/quirks.c | 55 ++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 58 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index 9cb1de7658b5..ad2960117acd 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -1262,7 +1262,7 @@ void pci_resume_bus(struct pci_bus *bus)
>>  		pci_walk_bus(bus, pci_resume_one, NULL);
>>  }
>>  
>> -static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
>> +int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
>>  {
>>  	int delay = 1;
>>  	bool retrain = false;
>> @@ -1344,6 +1344,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
>>  
>>  	return 0;
>>  }
>> +EXPORT_SYMBOL_GPL(pci_dev_wait);
>>  
>>  /**
>>   * pci_power_up - Put the given device into D0
>> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>> index f2958318d259..3a98e00eb02a 100644
>> --- a/drivers/pci/pci.h
>> +++ b/drivers/pci/pci.h
>> @@ -109,6 +109,7 @@ void pci_init_reset_methods(struct pci_dev *dev);
>>  int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
>>  int pci_bus_error_reset(struct pci_dev *dev);
>>  int __pci_reset_bus(struct pci_bus *bus);
>> +int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout);
>>  
>>  struct pci_cap_saved_data {
>>  	u16		cap_nr;
>> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>> index c354276d4bac..94bd2c82cbbd 100644
>> --- a/drivers/pci/quirks.c
>> +++ b/drivers/pci/quirks.c
>> @@ -4205,6 +4205,55 @@ static int reset_hinic_vf_dev(struct pci_dev *pdev, bool probe)
>>  	return 0;
>>  }
>>  
>> +#define MSFT_PCIE_RESET_READY_POLL_MS 60000 /* msec */
>> +#define MICROSOFT_2051_SVC 0xb210
>> +#define MICROSOFT_2051_MANA_MGMT 0x00b8
>> +#define MICROSOFT_2051_MANA_MGMT_GFT 0xb290
>> +
>> +/* Device specific reset for msft GFT and gdma devices */
>> +static int msft_pcie_flr(struct pci_dev *dev)
>> +{
>> +	if (!pci_wait_for_pending_transaction(dev))
>> +		pci_err(dev, "timed out waiting for pending transaction; "
>> +			"performing function level reset anyway\n");
>> +
>> +	pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
>> +
>> +	if (dev->imm_ready)
>> +		return 0;
>> +
>> +	/*
>> +	 * Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within
>> +	 * 100ms, but may silently discard requests while the FLR is in
>> +	 * progress. However, 100ms is much longer than required for modern
>> +	 * devices, so we can afford to reduce the timeout to 10ms.
>> +	 */
>> +	usleep_range(10000, 10001);
>> +
>> +	return pci_dev_wait(dev, "FLR", MSFT_PCIE_RESET_READY_POLL_MS);
>> +}
>> +
>> +/*
>> + * msft_pcie_reset_flr - initiate a PCIe function level reset
>> + * @dev: device to reset
>> + * @probe: if true, return 0 if device can be reset this way
>> + *
>> + * Initiate a function level reset on @dev.
>> + */
>> +static int msft_pcie_reset_flr(struct pci_dev *dev, bool probe)
>> +{
>> +	if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
>> +		return -ENOTTY;
>> +
>> +	if (!(dev->devcap & PCI_EXP_DEVCAP_FLR))
>> +		return -ENOTTY;
>> +
>> +	if (probe)
>> +		return 0;
>> +
>> +	return msft_pcie_flr(dev);
>> +}
>> +
>>  static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
>>  	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF,
>>  		 reset_intel_82599_sfp_virtfn },
>> @@ -4220,6 +4269,12 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
>>  		reset_chelsio_generic_dev },
>>  	{ PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF,
>>  		reset_hinic_vf_dev },
>> +	{ PCI_VENDOR_ID_MICROSOFT, MICROSOFT_2051_SVC,
>> +		msft_pcie_reset_flr},
>> +	{ PCI_VENDOR_ID_MICROSOFT, MICROSOFT_2051_MANA_MGMT,
>> +		msft_pcie_reset_flr},
>> +	{ PCI_VENDOR_ID_MICROSOFT, MICROSOFT_2051_MANA_MGMT_GFT,
>> +		msft_pcie_reset_flr},
>>  	{ 0 }
>>  };
>>  
>> -- 
>> 2.25.1
>>
Re: [PATCH] PCI: Reduce delay after FLR of Microsoft MANA devices
Posted by Leon Romanovsky 6 months, 3 weeks ago
On Wed, May 21, 2025 at 11:15:39PM +0000, grwhyte@linux.microsoft.com wrote:
> From: Graham Whyte <grwhyte@linux.microsoft.com>
> 
> Add a device-specific reset for Microsoft MANA devices with the FLR
> delay reduced from 100ms to 10ms. While this is not compliant with the pci
> spec, these devices safely complete the FLR much quicker than 100ms and
> this can be reduced to optimize certain scenarios
> 
> Signed-off-by: Graham Whyte <grwhyte@linux.microsoft.com>
> ---
>  drivers/pci/pci.c    |  3 ++-
>  drivers/pci/pci.h    |  1 +
>  drivers/pci/quirks.c | 55 ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 9cb1de7658b5..ad2960117acd 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -1262,7 +1262,7 @@ void pci_resume_bus(struct pci_bus *bus)
>  		pci_walk_bus(bus, pci_resume_one, NULL);
>  }
>  
> -static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
> +int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
>  {
>  	int delay = 1;
>  	bool retrain = false;
> @@ -1344,6 +1344,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
>  
>  	return 0;
>  }
> +EXPORT_SYMBOL_GPL(pci_dev_wait);

pci.c and quirks.c can't built as independent modules. There is no need
in EXPORT_SYMBOL_GPL here.

Thanks