[PATCH v3 01/10] PCI: Avoid saving error values for config space

Farhan Ali posted 10 patches 3 weeks ago
There is a newer version of this series
[PATCH v3 01/10] PCI: Avoid saving error values for config space
Posted by Farhan Ali 3 weeks ago
The current reset process saves the device's config space state before
reset and restores it afterward. However, when a device is in an error
state before reset, config space reads may return error values instead of
valid data. This results in saving corrupted values that get written back
to the device during state restoration.

Avoid saving the state of the config space when the device is in error.
While restoring we only restorei the state that can be restored through
kernel data such as BARs or doesn't depend on the saved state.

Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
---
 drivers/pci/pci.c      | 29 ++++++++++++++++++++++++++---
 drivers/pci/pcie/aer.c |  5 +++++
 drivers/pci/pcie/dpc.c |  5 +++++
 drivers/pci/pcie/ptm.c |  5 +++++
 drivers/pci/tph.c      |  5 +++++
 drivers/pci/vc.c       |  5 +++++
 6 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b0f4d98036cd..4b67d22faf0a 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1720,6 +1720,11 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
 	struct pci_cap_saved_state *save_state;
 	u16 *cap;
 
+	if (!dev->state_saved) {
+		pci_warn(dev, "Not restoring pcie state, no saved state");
+		return;
+	}
+
 	/*
 	 * Restore max latencies (in the LTR capability) before enabling
 	 * LTR itself in PCI_EXP_DEVCTL2.
@@ -1775,6 +1780,11 @@ static void pci_restore_pcix_state(struct pci_dev *dev)
 	struct pci_cap_saved_state *save_state;
 	u16 *cap;
 
+	if (!dev->state_saved) {
+		pci_warn(dev, "Not restoring pcix state, no saved state");
+		return;
+	}
+
 	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
 	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
 	if (!save_state || !pos)
@@ -1792,6 +1802,14 @@ static void pci_restore_pcix_state(struct pci_dev *dev)
 int pci_save_state(struct pci_dev *dev)
 {
 	int i;
+	u16 val;
+
+	pci_read_config_word(dev, PCI_DEVICE_ID, &val);
+	if (PCI_POSSIBLE_ERROR(val)) {
+		pci_warn(dev, "Device in error, not saving config space state\n");
+		return -EIO;
+	}
+
 	/* XXX: 100% dword access ok here? */
 	for (i = 0; i < 16; i++) {
 		pci_read_config_dword(dev, i * 4, &dev->saved_config_space[i]);
@@ -1854,6 +1872,14 @@ static void pci_restore_config_space_range(struct pci_dev *pdev,
 
 static void pci_restore_config_space(struct pci_dev *pdev)
 {
+	if (!pdev->state_saved) {
+		pci_warn(pdev, "No saved config space, restoring BARs\n");
+		pci_restore_bars(pdev);
+		pci_write_config_word(pdev, PCI_COMMAND,
+				PCI_COMMAND_MEMORY | PCI_COMMAND_IO);
+		return;
+	}
+
 	if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
 		pci_restore_config_space_range(pdev, 10, 15, 0, false);
 		/* Restore BARs before the command register. */
@@ -1906,9 +1932,6 @@ static void pci_restore_rebar_state(struct pci_dev *pdev)
  */
 void pci_restore_state(struct pci_dev *dev)
 {
-	if (!dev->state_saved)
-		return;
-
 	pci_restore_pcie_state(dev);
 	pci_restore_pasid_state(dev);
 	pci_restore_pri_state(dev);
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index e286c197d716..dca3502ef669 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -361,6 +361,11 @@ void pci_restore_aer_state(struct pci_dev *dev)
 	if (!aer)
 		return;
 
+	if (!dev->state_saved) {
+		pci_warn(dev, "Not restoring aer state, no saved state");
+		return;
+	}
+
 	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR);
 	if (!save_state)
 		return;
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index fc18349614d7..62c520af71a7 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -67,6 +67,11 @@ void pci_restore_dpc_state(struct pci_dev *dev)
 	if (!pci_is_pcie(dev))
 		return;
 
+	if (!dev->state_saved) {
+		pci_warn(dev, "Not restoring dpc state, no saved state");
+		return;
+	}
+
 	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
 	if (!save_state)
 		return;
diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
index 65e4b008be00..7b5bcc23000d 100644
--- a/drivers/pci/pcie/ptm.c
+++ b/drivers/pci/pcie/ptm.c
@@ -112,6 +112,11 @@ void pci_restore_ptm_state(struct pci_dev *dev)
 	if (!ptm)
 		return;
 
+	if (!dev->state_saved) {
+		pci_warn(dev, "Not restoring ptm state, no saved state");
+		return;
+	}
+
 	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_PTM);
 	if (!save_state)
 		return;
diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c
index cc64f93709a4..f0f1bae46736 100644
--- a/drivers/pci/tph.c
+++ b/drivers/pci/tph.c
@@ -435,6 +435,11 @@ void pci_restore_tph_state(struct pci_dev *pdev)
 	if (!pdev->tph_enabled)
 		return;
 
+	if (!pdev->state_saved) {
+		pci_warn(pdev, "Not restoring tph state, no saved state");
+		return;
+	}
+
 	save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_TPH);
 	if (!save_state)
 		return;
diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
index a4ff7f5f66dd..fda435cd49c1 100644
--- a/drivers/pci/vc.c
+++ b/drivers/pci/vc.c
@@ -391,6 +391,11 @@ void pci_restore_vc_state(struct pci_dev *dev)
 {
 	int i;
 
+	if (!dev->state_saved) {
+		pci_warn(dev, "Not restoring vc state, no saved state");
+		return;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(vc_caps); i++) {
 		int pos;
 		struct pci_cap_saved_state *save_state;
-- 
2.43.0
Re: [PATCH v3 01/10] PCI: Avoid saving error values for config space
Posted by Bjorn Helgaas 2 weeks, 2 days ago
On Thu, Sep 11, 2025 at 11:32:58AM -0700, Farhan Ali wrote:
> The current reset process saves the device's config space state before
> reset and restores it afterward. However, when a device is in an error
> state before reset, config space reads may return error values instead of
> valid data. This results in saving corrupted values that get written back
> to the device during state restoration.
> 
> Avoid saving the state of the config space when the device is in error.
> While restoring we only restorei the state that can be restored through
> kernel data such as BARs or doesn't depend on the saved state.
> 
> Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
> ---
>  drivers/pci/pci.c      | 29 ++++++++++++++++++++++++++---
>  drivers/pci/pcie/aer.c |  5 +++++
>  drivers/pci/pcie/dpc.c |  5 +++++
>  drivers/pci/pcie/ptm.c |  5 +++++
>  drivers/pci/tph.c      |  5 +++++
>  drivers/pci/vc.c       |  5 +++++
>  6 files changed, 51 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index b0f4d98036cd..4b67d22faf0a 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -1720,6 +1720,11 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
>  	struct pci_cap_saved_state *save_state;
>  	u16 *cap;
>  
> +	if (!dev->state_saved) {
> +		pci_warn(dev, "Not restoring pcie state, no saved state");
> +		return;

Seems like a lot of messages.  If we want to warn about this, why
don't we do it once in pci_restore_state()?

I guess you're making some judgment about what things can be restored
even when !dev->state_saved.  That seems kind of hard to maintain in
the future as other capabilities are added.

Also seems sort of questionable if we restore partial state and keep
using the device as if all is well.  Won't the device be in some kind
of inconsistent, unpredictable state then?

Bjorn
Re: [PATCH v3 01/10] PCI: Avoid saving error values for config space
Posted by Farhan Ali 2 weeks, 1 day ago
On 9/16/2025 11:09 AM, Bjorn Helgaas wrote:
> On Thu, Sep 11, 2025 at 11:32:58AM -0700, Farhan Ali wrote:
>> The current reset process saves the device's config space state before
>> reset and restores it afterward. However, when a device is in an error
>> state before reset, config space reads may return error values instead of
>> valid data. This results in saving corrupted values that get written back
>> to the device during state restoration.
>>
>> Avoid saving the state of the config space when the device is in error.
>> While restoring we only restorei the state that can be restored through
>> kernel data such as BARs or doesn't depend on the saved state.
>>
>> Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
>> ---
>>   drivers/pci/pci.c      | 29 ++++++++++++++++++++++++++---
>>   drivers/pci/pcie/aer.c |  5 +++++
>>   drivers/pci/pcie/dpc.c |  5 +++++
>>   drivers/pci/pcie/ptm.c |  5 +++++
>>   drivers/pci/tph.c      |  5 +++++
>>   drivers/pci/vc.c       |  5 +++++
>>   6 files changed, 51 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index b0f4d98036cd..4b67d22faf0a 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -1720,6 +1720,11 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
>>   	struct pci_cap_saved_state *save_state;
>>   	u16 *cap;
>>   
>> +	if (!dev->state_saved) {
>> +		pci_warn(dev, "Not restoring pcie state, no saved state");
>> +		return;
Hi Bjorn

Thanks for taking a look.

> Seems like a lot of messages.  If we want to warn about this, why
> don't we do it once in pci_restore_state()?

I thought providing messages about which state is not restored would be 
better and meaningful as we try to restore some of the state. But if the 
preference is to just have a single warn message in pci_restore_state 
then I can update it. (would also like to hear if Alex has any 
objections to that)

>
> I guess you're making some judgment about what things can be restored
> even when !dev->state_saved.  That seems kind of hard to maintain in
> the future as other capabilities are added.
>
> Also seems sort of questionable if we restore partial state and keep
> using the device as if all is well.  Won't the device be in some kind
> of inconsistent, unpredictable state then?
>
> Bjorn

I tried to avoid restoring state that explicitly needed to save the 
state. For some of the other capabilities, that didn't explicitly store 
the state, I tried to keep the same behavior. This is based on the 
discussion with Alex 
(https://lore.kernel.org/all/20250826094845.517e0fa7.alex.williamson@redhat.com/). 
Also AFAIU currently the dev->state_saved is set to true as long as we 
save the first 64 bytes of config space (pci_save_state), so we could 
for example fail to save the PCIe state, but while restoring can 
continue to restore other capabilities like pasid.

At the very least I would like to avoid corrupting the BAR registers and 
restore msix (arch_restore_msi_irqs) to get devices into a functional 
state after a reset. I am open to suggestions on how we can do this.

Would also like to get your feedback on patch 3 and the approach there 
of having a new flag in struct pci_slot.

Thanks
Farhan
Re: [PATCH v3 01/10] PCI: Avoid saving error values for config space
Posted by Alex Williamson 1 week, 6 days ago
On Tue, 16 Sep 2025 13:00:30 -0700
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 9/16/2025 11:09 AM, Bjorn Helgaas wrote:
> > On Thu, Sep 11, 2025 at 11:32:58AM -0700, Farhan Ali wrote:  
> >> The current reset process saves the device's config space state before
> >> reset and restores it afterward. However, when a device is in an error
> >> state before reset, config space reads may return error values instead of
> >> valid data. This results in saving corrupted values that get written back
> >> to the device during state restoration.
> >>
> >> Avoid saving the state of the config space when the device is in error.
> >> While restoring we only restorei the state that can be restored through
> >> kernel data such as BARs or doesn't depend on the saved state.
> >>
> >> Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
> >> ---
> >>   drivers/pci/pci.c      | 29 ++++++++++++++++++++++++++---
> >>   drivers/pci/pcie/aer.c |  5 +++++
> >>   drivers/pci/pcie/dpc.c |  5 +++++
> >>   drivers/pci/pcie/ptm.c |  5 +++++
> >>   drivers/pci/tph.c      |  5 +++++
> >>   drivers/pci/vc.c       |  5 +++++
> >>   6 files changed, 51 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> >> index b0f4d98036cd..4b67d22faf0a 100644
> >> --- a/drivers/pci/pci.c
> >> +++ b/drivers/pci/pci.c
> >> @@ -1720,6 +1720,11 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
> >>   	struct pci_cap_saved_state *save_state;
> >>   	u16 *cap;
> >>   
> >> +	if (!dev->state_saved) {
> >> +		pci_warn(dev, "Not restoring pcie state, no saved state");
> >> +		return;  
> Hi Bjorn
> 
> Thanks for taking a look.
> 
> > Seems like a lot of messages.  If we want to warn about this, why
> > don't we do it once in pci_restore_state()?  
> 
> I thought providing messages about which state is not restored would be 
> better and meaningful as we try to restore some of the state. But if the 
> preference is to just have a single warn message in pci_restore_state 
> then I can update it. (would also like to hear if Alex has any 
> objections to that)

I thought it got a bit verbose as well.

> > I guess you're making some judgment about what things can be restored
> > even when !dev->state_saved.  That seems kind of hard to maintain in
> > the future as other capabilities are added.
> >
> > Also seems sort of questionable if we restore partial state and keep
> > using the device as if all is well.  Won't the device be in some kind
> > of inconsistent, unpredictable state then?

To an extent that's always true.  Reset is a lossy process, we're
intentionally throwing away the internal state of the device and
attempting to restore the architected config space as best as we can.
It's hard to guarantee it's complete though.

In this case we're largely just trying to determine whether the
pre-reset config space is already broken, which would mean that some
forms of reset are unavailable and our restore data is bogus.  In
addition to the s390x specific scenario resolved here, I hope this
might eliminate some of the "device stuck in D3" or "device stuck with
pending transaction" errors we currently see trying to do PM or FLR
resets on broken devices.  Failing to actually reset the device in any
way, then trying to write back -1 for restore data is what we'd see
today, which also isn't what we intend.

It probably doesn't make sense to note the specific capabilities that
aren't being restored.  Probably a single pci_warn indicating the
device config space is inaccessible prior to reset and will only be
partially restored is probably sufficient.  Thanks,

Alex
Re: [PATCH v3 01/10] PCI: Avoid saving error values for config space
Posted by Alex Williamson 2 weeks, 5 days ago
On Thu, 11 Sep 2025 11:32:58 -0700
Farhan Ali <alifm@linux.ibm.com> wrote:

> The current reset process saves the device's config space state before
> reset and restores it afterward. However, when a device is in an error
> state before reset, config space reads may return error values instead of
> valid data. This results in saving corrupted values that get written back
> to the device during state restoration.
> 
> Avoid saving the state of the config space when the device is in error.
> While restoring we only restorei the state that can be restored through

s/restorei/restore/

> kernel data such as BARs or doesn't depend on the saved state.
> 
> Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
> ---
>  drivers/pci/pci.c      | 29 ++++++++++++++++++++++++++---
>  drivers/pci/pcie/aer.c |  5 +++++
>  drivers/pci/pcie/dpc.c |  5 +++++
>  drivers/pci/pcie/ptm.c |  5 +++++
>  drivers/pci/tph.c      |  5 +++++
>  drivers/pci/vc.c       |  5 +++++
>  6 files changed, 51 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index b0f4d98036cd..4b67d22faf0a 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -1720,6 +1720,11 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
>  	struct pci_cap_saved_state *save_state;
>  	u16 *cap;
>  
> +	if (!dev->state_saved) {
> +		pci_warn(dev, "Not restoring pcie state, no saved state");
> +		return;
> +	}
> +
>  	/*
>  	 * Restore max latencies (in the LTR capability) before enabling
>  	 * LTR itself in PCI_EXP_DEVCTL2.
> @@ -1775,6 +1780,11 @@ static void pci_restore_pcix_state(struct pci_dev *dev)
>  	struct pci_cap_saved_state *save_state;
>  	u16 *cap;
>  
> +	if (!dev->state_saved) {
> +		pci_warn(dev, "Not restoring pcix state, no saved state");
> +		return;
> +	}
> +
>  	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
>  	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
>  	if (!save_state || !pos)
> @@ -1792,6 +1802,14 @@ static void pci_restore_pcix_state(struct pci_dev *dev)
>  int pci_save_state(struct pci_dev *dev)
>  {
>  	int i;
> +	u16 val;
> +
> +	pci_read_config_word(dev, PCI_DEVICE_ID, &val);
> +	if (PCI_POSSIBLE_ERROR(val)) {
> +		pci_warn(dev, "Device in error, not saving config space state\n");
> +		return -EIO;
> +	}
> +

I don't think this works with standard VFs, per the spec the device ID
register returns 0xFFFF.  Likely need to look for a CRS or error status
across both vendor and device ID registers.

We could be a little more formal and specific describing the skipped
states too, ex. "PCIe capability", "PCI-X capability", "PCI AER
capability", etc.  Thanks,

Alex

>  	/* XXX: 100% dword access ok here? */
>  	for (i = 0; i < 16; i++) {
>  		pci_read_config_dword(dev, i * 4, &dev->saved_config_space[i]);
> @@ -1854,6 +1872,14 @@ static void pci_restore_config_space_range(struct pci_dev *pdev,
>  
>  static void pci_restore_config_space(struct pci_dev *pdev)
>  {
> +	if (!pdev->state_saved) {
> +		pci_warn(pdev, "No saved config space, restoring BARs\n");
> +		pci_restore_bars(pdev);
> +		pci_write_config_word(pdev, PCI_COMMAND,
> +				PCI_COMMAND_MEMORY | PCI_COMMAND_IO);
> +		return;
> +	}
> +
>  	if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
>  		pci_restore_config_space_range(pdev, 10, 15, 0, false);
>  		/* Restore BARs before the command register. */
> @@ -1906,9 +1932,6 @@ static void pci_restore_rebar_state(struct pci_dev *pdev)
>   */
>  void pci_restore_state(struct pci_dev *dev)
>  {
> -	if (!dev->state_saved)
> -		return;
> -
>  	pci_restore_pcie_state(dev);
>  	pci_restore_pasid_state(dev);
>  	pci_restore_pri_state(dev);
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index e286c197d716..dca3502ef669 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -361,6 +361,11 @@ void pci_restore_aer_state(struct pci_dev *dev)
>  	if (!aer)
>  		return;
>  
> +	if (!dev->state_saved) {
> +		pci_warn(dev, "Not restoring aer state, no saved state");
> +		return;
> +	}
> +
>  	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR);
>  	if (!save_state)
>  		return;
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index fc18349614d7..62c520af71a7 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -67,6 +67,11 @@ void pci_restore_dpc_state(struct pci_dev *dev)
>  	if (!pci_is_pcie(dev))
>  		return;
>  
> +	if (!dev->state_saved) {
> +		pci_warn(dev, "Not restoring dpc state, no saved state");
> +		return;
> +	}
> +
>  	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
>  	if (!save_state)
>  		return;
> diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
> index 65e4b008be00..7b5bcc23000d 100644
> --- a/drivers/pci/pcie/ptm.c
> +++ b/drivers/pci/pcie/ptm.c
> @@ -112,6 +112,11 @@ void pci_restore_ptm_state(struct pci_dev *dev)
>  	if (!ptm)
>  		return;
>  
> +	if (!dev->state_saved) {
> +		pci_warn(dev, "Not restoring ptm state, no saved state");
> +		return;
> +	}
> +
>  	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_PTM);
>  	if (!save_state)
>  		return;
> diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c
> index cc64f93709a4..f0f1bae46736 100644
> --- a/drivers/pci/tph.c
> +++ b/drivers/pci/tph.c
> @@ -435,6 +435,11 @@ void pci_restore_tph_state(struct pci_dev *pdev)
>  	if (!pdev->tph_enabled)
>  		return;
>  
> +	if (!pdev->state_saved) {
> +		pci_warn(pdev, "Not restoring tph state, no saved state");
> +		return;
> +	}
> +
>  	save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_TPH);
>  	if (!save_state)
>  		return;
> diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
> index a4ff7f5f66dd..fda435cd49c1 100644
> --- a/drivers/pci/vc.c
> +++ b/drivers/pci/vc.c
> @@ -391,6 +391,11 @@ void pci_restore_vc_state(struct pci_dev *dev)
>  {
>  	int i;
>  
> +	if (!dev->state_saved) {
> +		pci_warn(dev, "Not restoring vc state, no saved state");
> +		return;
> +	}
> +
>  	for (i = 0; i < ARRAY_SIZE(vc_caps); i++) {
>  		int pos;
>  		struct pci_cap_saved_state *save_state;
Re: [PATCH v3 01/10] PCI: Avoid saving error values for config space
Posted by Farhan Ali 2 weeks, 3 days ago
On 9/13/2025 1:27 AM, Alex Williamson wrote:
> On Thu, 11 Sep 2025 11:32:58 -0700
> Farhan Ali <alifm@linux.ibm.com> wrote:
>
>> The current reset process saves the device's config space state before
>> reset and restores it afterward. However, when a device is in an error
>> state before reset, config space reads may return error values instead of
>> valid data. This results in saving corrupted values that get written back
>> to the device during state restoration.
>>
>> Avoid saving the state of the config space when the device is in error.
>> While restoring we only restorei the state that can be restored through
> s/restorei/restore/

Thanks for catching that, will fix.

>> kernel data such as BARs or doesn't depend on the saved state.
>>
>> Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
>> ---
>>   drivers/pci/pci.c      | 29 ++++++++++++++++++++++++++---
>>   drivers/pci/pcie/aer.c |  5 +++++
>>   drivers/pci/pcie/dpc.c |  5 +++++
>>   drivers/pci/pcie/ptm.c |  5 +++++
>>   drivers/pci/tph.c      |  5 +++++
>>   drivers/pci/vc.c       |  5 +++++
>>   6 files changed, 51 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index b0f4d98036cd..4b67d22faf0a 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -1720,6 +1720,11 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
>>   	struct pci_cap_saved_state *save_state;
>>   	u16 *cap;
>>   
>> +	if (!dev->state_saved) {
>> +		pci_warn(dev, "Not restoring pcie state, no saved state");
>> +		return;
>> +	}
>> +
>>   	/*
>>   	 * Restore max latencies (in the LTR capability) before enabling
>>   	 * LTR itself in PCI_EXP_DEVCTL2.
>> @@ -1775,6 +1780,11 @@ static void pci_restore_pcix_state(struct pci_dev *dev)
>>   	struct pci_cap_saved_state *save_state;
>>   	u16 *cap;
>>   
>> +	if (!dev->state_saved) {
>> +		pci_warn(dev, "Not restoring pcix state, no saved state");
>> +		return;
>> +	}
>> +
>>   	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
>>   	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
>>   	if (!save_state || !pos)
>> @@ -1792,6 +1802,14 @@ static void pci_restore_pcix_state(struct pci_dev *dev)
>>   int pci_save_state(struct pci_dev *dev)
>>   {
>>   	int i;
>> +	u16 val;
>> +
>> +	pci_read_config_word(dev, PCI_DEVICE_ID, &val);
>> +	if (PCI_POSSIBLE_ERROR(val)) {
>> +		pci_warn(dev, "Device in error, not saving config space state\n");
>> +		return -EIO;
>> +	}
>> +
> I don't think this works with standard VFs, per the spec the device ID
> register returns 0xFFFF.  Likely need to look for a CRS or error status
> across both vendor and device ID registers.

Yes, I missed that. Though the spec also mentions both vendor and device 
id registers can be 0xFFFF for standard VFs. The implementation note in 
the spec mentions legacy software can ignore VFs if both device id and 
vendor id is 0xFFFF. So not sure if checking both would work here?

Also by CRS are you referring to Configuration Request Retry? (In PCIe 
spec v6 I couldn't find reference to CRS, but found RRS so its probably 
been renamed to Request Retry Status). Based on my understanding of the 
spec a function will return CRS after a reset, but in this case we are 
trying to read and save the state before a reset? Based on 
pci_bus_rrs_vendor_id(), on a CRS vendor ID returned would be 0x1, but 
that wouldn't work for s390 as currently reads on error will return 
0xFFFF. Apologies if I misunderstood anything.

I see pci_dev_wait() check for command and status register in case RRS 
is not available, would that be appropriate check here?


>
> We could be a little more formal and specific describing the skipped
> states too, ex. "PCIe capability", "PCI-X capability", "PCI AER
> capability", etc.  Thanks,
>
> Alex

Makes sense, will update the warn messages.

Thanks
Farhan

>
>>   	/* XXX: 100% dword access ok here? */
>>   	for (i = 0; i < 16; i++) {
>>   		pci_read_config_dword(dev, i * 4, &dev->saved_config_space[i]);
>> @@ -1854,6 +1872,14 @@ static void pci_restore_config_space_range(struct pci_dev *pdev,
>>   
>>   static void pci_restore_config_space(struct pci_dev *pdev)
>>   {
>> +	if (!pdev->state_saved) {
>> +		pci_warn(pdev, "No saved config space, restoring BARs\n");
>> +		pci_restore_bars(pdev);
>> +		pci_write_config_word(pdev, PCI_COMMAND,
>> +				PCI_COMMAND_MEMORY | PCI_COMMAND_IO);
>> +		return;
>> +	}
>> +
>>   	if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
>>   		pci_restore_config_space_range(pdev, 10, 15, 0, false);
>>   		/* Restore BARs before the command register. */
>> @@ -1906,9 +1932,6 @@ static void pci_restore_rebar_state(struct pci_dev *pdev)
>>    */
>>   void pci_restore_state(struct pci_dev *dev)
>>   {
>> -	if (!dev->state_saved)
>> -		return;
>> -
>>   	pci_restore_pcie_state(dev);
>>   	pci_restore_pasid_state(dev);
>>   	pci_restore_pri_state(dev);
>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>> index e286c197d716..dca3502ef669 100644
>> --- a/drivers/pci/pcie/aer.c
>> +++ b/drivers/pci/pcie/aer.c
>> @@ -361,6 +361,11 @@ void pci_restore_aer_state(struct pci_dev *dev)
>>   	if (!aer)
>>   		return;
>>   
>> +	if (!dev->state_saved) {
>> +		pci_warn(dev, "Not restoring aer state, no saved state");
>> +		return;
>> +	}
>> +
>>   	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR);
>>   	if (!save_state)
>>   		return;
>> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
>> index fc18349614d7..62c520af71a7 100644
>> --- a/drivers/pci/pcie/dpc.c
>> +++ b/drivers/pci/pcie/dpc.c
>> @@ -67,6 +67,11 @@ void pci_restore_dpc_state(struct pci_dev *dev)
>>   	if (!pci_is_pcie(dev))
>>   		return;
>>   
>> +	if (!dev->state_saved) {
>> +		pci_warn(dev, "Not restoring dpc state, no saved state");
>> +		return;
>> +	}
>> +
>>   	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
>>   	if (!save_state)
>>   		return;
>> diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
>> index 65e4b008be00..7b5bcc23000d 100644
>> --- a/drivers/pci/pcie/ptm.c
>> +++ b/drivers/pci/pcie/ptm.c
>> @@ -112,6 +112,11 @@ void pci_restore_ptm_state(struct pci_dev *dev)
>>   	if (!ptm)
>>   		return;
>>   
>> +	if (!dev->state_saved) {
>> +		pci_warn(dev, "Not restoring ptm state, no saved state");
>> +		return;
>> +	}
>> +
>>   	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_PTM);
>>   	if (!save_state)
>>   		return;
>> diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c
>> index cc64f93709a4..f0f1bae46736 100644
>> --- a/drivers/pci/tph.c
>> +++ b/drivers/pci/tph.c
>> @@ -435,6 +435,11 @@ void pci_restore_tph_state(struct pci_dev *pdev)
>>   	if (!pdev->tph_enabled)
>>   		return;
>>   
>> +	if (!pdev->state_saved) {
>> +		pci_warn(pdev, "Not restoring tph state, no saved state");
>> +		return;
>> +	}
>> +
>>   	save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_TPH);
>>   	if (!save_state)
>>   		return;
>> diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
>> index a4ff7f5f66dd..fda435cd49c1 100644
>> --- a/drivers/pci/vc.c
>> +++ b/drivers/pci/vc.c
>> @@ -391,6 +391,11 @@ void pci_restore_vc_state(struct pci_dev *dev)
>>   {
>>   	int i;
>>   
>> +	if (!dev->state_saved) {
>> +		pci_warn(dev, "Not restoring vc state, no saved state");
>> +		return;
>> +	}
>> +
>>   	for (i = 0; i < ARRAY_SIZE(vc_caps); i++) {
>>   		int pos;
>>   		struct pci_cap_saved_state *save_state;