[PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device

Nicolin Chen posted 5 patches 2 months, 4 weeks ago
There is a newer version of this series
[PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Nicolin Chen 2 months, 4 weeks ago
PCIe permits a device to ignore ATS invalidation TLPs, while processing a
reset. This creates a problem visible to the OS where an ATS invalidation
command will time out: e.g. an SVA domain will have no coordination with a
reset event and can racily issue ATS invalidations to a resetting device.

The PCIe spec in sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and
block ATS before initiating a Function Level Reset. It also mentions that
other reset methods could have the same vulnerability as well.

Now iommu_dev_reset_prepare/done() helpers are introduced for this matter.
Use them in all the existing reset functions, which will attach the device
to an IOMMU_DOMAIN_BLOCKED during a reset, so as to allow IOMMU driver to:
 - invoke pci_disable_ats() and pci_enable_ats(), if necessary
 - wait for all ATS invalidations to complete
 - stop issuing new ATS invalidations
 - fence any incoming ATS queries

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/pci/pci.h      |  2 ++
 drivers/pci/pci-acpi.c | 12 ++++++--
 drivers/pci/pci.c      | 68 ++++++++++++++++++++++++++++++++++++++----
 drivers/pci/quirks.c   | 18 ++++++++++-
 4 files changed, 92 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 4492b809094b5..a29286dfd870c 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -198,6 +198,8 @@ void pci_init_reset_methods(struct pci_dev *dev);
 int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
 int pci_bus_error_reset(struct pci_dev *dev);
 int __pci_reset_bus(struct pci_bus *bus);
+int pci_reset_iommu_prepare(struct pci_dev *dev);
+void pci_reset_iommu_done(struct pci_dev *dev);
 
 struct pci_cap_saved_data {
 	u16		cap_nr;
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 9369377725fa0..60d29b183f2c2 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -971,6 +971,7 @@ void pci_set_acpi_fwnode(struct pci_dev *dev)
 int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
 {
 	acpi_handle handle = ACPI_HANDLE(&dev->dev);
+	int ret = 0;
 
 	if (!handle || !acpi_has_method(handle, "_RST"))
 		return -ENOTTY;
@@ -978,12 +979,19 @@ int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
 	if (probe)
 		return 0;
 
+	ret = pci_reset_iommu_prepare(dev);
+	if (ret) {
+		pci_err(dev, "failed to stop IOMMU\n");
+		return ret;
+	}
+
 	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) {
 		pci_warn(dev, "ACPI _RST failed\n");
-		return -ENOTTY;
+		ret = -ENOTTY;
 	}
 
-	return 0;
+	pci_reset_iommu_done(dev);
+	return ret;
 }
 
 bool acpi_pci_power_manageable(struct pci_dev *dev)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b14dd064006cc..52461d952cbf1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -13,6 +13,7 @@
 #include <linux/delay.h>
 #include <linux/dmi.h>
 #include <linux/init.h>
+#include <linux/iommu.h>
 #include <linux/msi.h>
 #include <linux/of.h>
 #include <linux/pci.h>
@@ -25,6 +26,7 @@
 #include <linux/logic_pio.h>
 #include <linux/device.h>
 #include <linux/pm_runtime.h>
+#include <linux/pci-ats.h>
 #include <linux/pci_hotplug.h>
 #include <linux/vmalloc.h>
 #include <asm/dma.h>
@@ -95,6 +97,23 @@ bool pci_reset_supported(struct pci_dev *dev)
 	return dev->reset_methods[0] != 0;
 }
 
+/*
+ * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS before
+ * initiating a reset. Notify the iommu driver that enabled ATS.
+ */
+int pci_reset_iommu_prepare(struct pci_dev *dev)
+{
+	if (pci_ats_supported(dev))
+		return iommu_dev_reset_prepare(&dev->dev);
+	return 0;
+}
+
+void pci_reset_iommu_done(struct pci_dev *dev)
+{
+	if (pci_ats_supported(dev))
+		iommu_dev_reset_done(&dev->dev);
+}
+
 #ifdef CONFIG_PCI_DOMAINS
 int pci_domains_supported = 1;
 #endif
@@ -4478,13 +4497,22 @@ EXPORT_SYMBOL(pci_wait_for_pending_transaction);
  */
 int pcie_flr(struct pci_dev *dev)
 {
+	int ret = 0;
+
 	if (!pci_wait_for_pending_transaction(dev))
 		pci_err(dev, "timed out waiting for pending transaction; performing function level reset anyway\n");
 
+	/* Have to call it after waiting for pending DMA transaction */
+	ret = pci_reset_iommu_prepare(dev);
+	if (ret) {
+		pci_err(dev, "failed to stop IOMMU\n");
+		return ret;
+	}
+
 	pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
 
 	if (dev->imm_ready)
-		return 0;
+		goto done;
 
 	/*
 	 * Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within
@@ -4493,7 +4521,10 @@ int pcie_flr(struct pci_dev *dev)
 	 */
 	msleep(100);
 
-	return pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
+	ret = pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
+done:
+	pci_reset_iommu_done(dev);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(pcie_flr);
 
@@ -4521,6 +4552,7 @@ EXPORT_SYMBOL_GPL(pcie_reset_flr);
 
 static int pci_af_flr(struct pci_dev *dev, bool probe)
 {
+	int ret = 0;
 	int pos;
 	u8 cap;
 
@@ -4547,10 +4579,17 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
 				 PCI_AF_STATUS_TP << 8))
 		pci_err(dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n");
 
+	/* Have to call it after waiting for pending DMA transaction */
+	ret = pci_reset_iommu_prepare(dev);
+	if (ret) {
+		pci_err(dev, "failed to stop IOMMU\n");
+		return ret;
+	}
+
 	pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
 
 	if (dev->imm_ready)
-		return 0;
+		goto done;
 
 	/*
 	 * Per Advanced Capabilities for Conventional PCI ECN, 13 April 2006,
@@ -4560,7 +4599,10 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
 	 */
 	msleep(100);
 
-	return pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
+	ret = pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
+done:
+	pci_reset_iommu_done(dev);
+	return ret;
 }
 
 /**
@@ -4581,6 +4623,7 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
 static int pci_pm_reset(struct pci_dev *dev, bool probe)
 {
 	u16 csr;
+	int ret;
 
 	if (!dev->pm_cap || dev->dev_flags & PCI_DEV_FLAGS_NO_PM_RESET)
 		return -ENOTTY;
@@ -4595,6 +4638,12 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe)
 	if (dev->current_state != PCI_D0)
 		return -EINVAL;
 
+	ret = pci_reset_iommu_prepare(dev);
+	if (ret) {
+		pci_err(dev, "failed to stop IOMMU\n");
+		return ret;
+	}
+
 	csr &= ~PCI_PM_CTRL_STATE_MASK;
 	csr |= PCI_D3hot;
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
@@ -4605,7 +4654,9 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe)
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
 	pci_dev_d3_sleep(dev);
 
-	return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
+	ret = pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
+	pci_reset_iommu_done(dev);
+	return ret;
 }
 
 /**
@@ -5060,6 +5111,12 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
 	if (rc)
 		return -ENOTTY;
 
+	rc = pci_reset_iommu_prepare(dev);
+	if (rc) {
+		pci_err(dev, "failed to stop IOMMU\n");
+		return rc;
+	}
+
 	if (reg & PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR) {
 		val = reg;
 	} else {
@@ -5074,6 +5131,7 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
 		pci_write_config_word(bridge, dvsec + PCI_DVSEC_CXL_PORT_CTL,
 				      reg);
 
+	pci_reset_iommu_done(dev);
 	return rc;
 }
 
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 214ed060ca1b3..891d9e5a97e93 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4226,6 +4226,22 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
 	{ 0 }
 };
 
+static int __pci_dev_specific_reset(struct pci_dev *dev, bool probe,
+				    const struct pci_dev_reset_methods *i)
+{
+	int ret;
+
+	ret = pci_reset_iommu_prepare(dev);
+	if (ret) {
+		pci_err(dev, "failed to stop IOMMU\n");
+		return ret;
+	}
+
+	ret = i->reset(dev, probe);
+	pci_reset_iommu_done(dev);
+	return ret;
+}
+
 /*
  * These device-specific reset methods are here rather than in a driver
  * because when a host assigns a device to a guest VM, the host may need
@@ -4240,7 +4256,7 @@ int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
 		     i->vendor == (u16)PCI_ANY_ID) &&
 		    (i->device == dev->device ||
 		     i->device == (u16)PCI_ANY_ID))
-			return i->reset(dev, probe);
+			return __pci_dev_specific_reset(dev, probe, i);
 	}
 
 	return -ENOTTY;
-- 
2.43.0
Re: [PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Bjorn Helgaas 2 months, 3 weeks ago
On Mon, Nov 10, 2025 at 09:12:55PM -0800, Nicolin Chen wrote:

Run "git log --oneline drivers/pci/pci.c" and match the subject line
style.

> PCIe permits a device to ignore ATS invalidation TLPs, while processing a
> reset. This creates a problem visible to the OS where an ATS invalidation
> command will time out: e.g. an SVA domain will have no coordination with a
> reset event and can racily issue ATS invalidations to a resetting device.

s/TLPs, while/TLPs while/

> The PCIe spec in sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and
> block ATS before initiating a Function Level Reset. It also mentions that
> other reset methods could have the same vulnerability as well.

Include spec revision, e.g., "PCIe r7.0, sec 10.3.1".

> Now iommu_dev_reset_prepare/done() helpers are introduced for this matter.

s/Now ... are introduced for this matter/Add ...helpers/

> Use them in all the existing reset functions, which will attach the device
> to an IOMMU_DOMAIN_BLOCKED during a reset, so as to allow IOMMU driver to:
>  - invoke pci_disable_ats() and pci_enable_ats(), if necessary
>  - wait for all ATS invalidations to complete
>  - stop issuing new ATS invalidations
>  - fence any incoming ATS queries

Thanks for addressing this problem.

> +++ b/drivers/pci/pci-acpi.c
> @@ -971,6 +971,7 @@ void pci_set_acpi_fwnode(struct pci_dev *dev)
>  int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
>  {
>  	acpi_handle handle = ACPI_HANDLE(&dev->dev);
> +	int ret = 0;

Unnecessary initialization.

> +int pci_reset_iommu_prepare(struct pci_dev *dev)
> +{
> +	if (pci_ats_supported(dev))
> +		return iommu_dev_reset_prepare(&dev->dev);

Why bother checking pci_ats_supported() here?  That could be done
inside iommu_dev_reset_prepare(), since iommu.c already uses
dev_is_pci() and pci_ats_supported() is already exported outside
drivers/pci/.

> +void pci_reset_iommu_done(struct pci_dev *dev)
> +{
> +	if (pci_ats_supported(dev))
> +		iommu_dev_reset_done(&dev->dev);

And here.

>  int pcie_flr(struct pci_dev *dev)
>  {
> +	int ret = 0;

Unnecessary initialization.

>  static int pci_af_flr(struct pci_dev *dev, bool probe)
>  {
> +	int ret = 0;

Unnecessary initialization.
Re: [PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Nicolin Chen 2 months, 2 weeks ago
On Mon, Nov 17, 2025 at 04:58:52PM -0600, Bjorn Helgaas wrote:
> On Mon, Nov 10, 2025 at 09:12:55PM -0800, Nicolin Chen wrote:
> > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > +{
> > +	if (pci_ats_supported(dev))
> > +		return iommu_dev_reset_prepare(&dev->dev);
> 
> Why bother checking pci_ats_supported() here?  That could be done
> inside iommu_dev_reset_prepare(), since iommu.c already uses
> dev_is_pci() and pci_ats_supported() is already exported outside
> drivers/pci/.

Ack. I will fix all of these.

Thanks for the review!
Nicolin
RE: [PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Tian, Kevin 2 months, 3 weeks ago
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Tuesday, November 11, 2025 1:13 PM
> 
> PCIe permits a device to ignore ATS invalidation TLPs, while processing a
> reset. This creates a problem visible to the OS where an ATS invalidation
> command will time out: e.g. an SVA domain will have no coordination with a
> reset event and can racily issue ATS invalidations to a resetting device.
> 
> The PCIe spec in sec 10.3.1 IMPLEMENTATION NOTE recommends to disable
> and
> block ATS before initiating a Function Level Reset. It also mentions that
> other reset methods could have the same vulnerability as well.
> 
> Now iommu_dev_reset_prepare/done() helpers are introduced for this
> matter.
> Use them in all the existing reset functions, which will attach the device

looks pci_reset_bus_function() was missed?

> @@ -971,6 +971,7 @@ void pci_set_acpi_fwnode(struct pci_dev *dev)
>  int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
>  {
>  	acpi_handle handle = ACPI_HANDLE(&dev->dev);
> +	int ret = 0;

no need to initialize it. ditto for other reset functions.

> +/*
> + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
> before
> + * initiating a reset. Notify the iommu driver that enabled ATS.
> + */
> +int pci_reset_iommu_prepare(struct pci_dev *dev)
> +{
> +	if (pci_ats_supported(dev))
> +		return iommu_dev_reset_prepare(&dev->dev);
> +	return 0;
> +}

the comment says "driver that enabled ATS", but the code checks
whether ATS is supported.

which one is desired?

> 
> +	/* Have to call it after waiting for pending DMA transaction */
> +	ret = pci_reset_iommu_prepare(dev);
> +	if (ret) {
> +		pci_err(dev, "failed to stop IOMMU\n");

the error message could be more informative.
Re: [PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Nicolin Chen 2 months, 3 weeks ago
On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Tuesday, November 11, 2025 1:13 PM
> > 
> > PCIe permits a device to ignore ATS invalidation TLPs, while processing a
> > reset. This creates a problem visible to the OS where an ATS invalidation
> > command will time out: e.g. an SVA domain will have no coordination with a
> > reset event and can racily issue ATS invalidations to a resetting device.
> > 
> > The PCIe spec in sec 10.3.1 IMPLEMENTATION NOTE recommends to disable
> > and
> > block ATS before initiating a Function Level Reset. It also mentions that
> > other reset methods could have the same vulnerability as well.
> > 
> > Now iommu_dev_reset_prepare/done() helpers are introduced for this
> > matter.
> > Use them in all the existing reset functions, which will attach the device
> 
> looks pci_reset_bus_function() was missed?

Will add that.

> > @@ -971,6 +971,7 @@ void pci_set_acpi_fwnode(struct pci_dev *dev)
> >  int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
> >  {
> >  	acpi_handle handle = ACPI_HANDLE(&dev->dev);
> > +	int ret = 0;
> 
> no need to initialize it. ditto for other reset functions.

Ack.

> > +/*
> > + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
> > before
> > + * initiating a reset. Notify the iommu driver that enabled ATS.
> > + */
> > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > +{
> > +	if (pci_ats_supported(dev))
> > +		return iommu_dev_reset_prepare(&dev->dev);
> > +	return 0;
> > +}
> 
> the comment says "driver that enabled ATS", but the code checks
> whether ATS is supported.
> 
> which one is desired?

The comments says "the iommu driver that enabled ATS". It doesn't
conflict with what the PCI core checks here?

> > +	/* Have to call it after waiting for pending DMA transaction */
> > +	ret = pci_reset_iommu_prepare(dev);
> > +	if (ret) {
> > +		pci_err(dev, "failed to stop IOMMU\n");
> 
> the error message could be more informative.

OK. Perhaps print the ret value.

Thanks!
Nicolin
RE: [PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Tian, Kevin 2 months, 3 weeks ago
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Saturday, November 15, 2025 2:01 AM
> 
> On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: Tuesday, November 11, 2025 1:13 PM
> > >
> > > +/*
> > > + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables
> ATS
> > > before
> > > + * initiating a reset. Notify the iommu driver that enabled ATS.
> > > + */
> > > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > > +{
> > > +	if (pci_ats_supported(dev))
> > > +		return iommu_dev_reset_prepare(&dev->dev);
> > > +	return 0;
> > > +}
> >
> > the comment says "driver that enabled ATS", but the code checks
> > whether ATS is supported.
> >
> > which one is desired?
> 
> The comments says "the iommu driver that enabled ATS". It doesn't
> conflict with what the PCI core checks here?

actually this is sent to all IOMMU drivers. there is no check on whether
a specific driver has enabled ATS in this path.

> 
> > > +	/* Have to call it after waiting for pending DMA transaction */
> > > +	ret = pci_reset_iommu_prepare(dev);
> > > +	if (ret) {
> > > +		pci_err(dev, "failed to stop IOMMU\n");
> >
> > the error message could be more informative.
> 
> OK. Perhaps print the ret value.
> 

and mention that it's for PCI reset.
Re: [PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Nicolin Chen 2 months, 3 weeks ago
On Mon, Nov 17, 2025 at 04:52:05AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Saturday, November 15, 2025 2:01 AM
> > 
> > On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
> > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > Sent: Tuesday, November 11, 2025 1:13 PM
> > > >
> > > > +/*
> > > > + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables
> > ATS
> > > > before
> > > > + * initiating a reset. Notify the iommu driver that enabled ATS.
> > > > + */
> > > > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > > > +{
> > > > +	if (pci_ats_supported(dev))
> > > > +		return iommu_dev_reset_prepare(&dev->dev);
> > > > +	return 0;
> > > > +}
> > >
> > > the comment says "driver that enabled ATS", but the code checks
> > > whether ATS is supported.
> > >
> > > which one is desired?
> > 
> > The comments says "the iommu driver that enabled ATS". It doesn't
> > conflict with what the PCI core checks here?
> 
> actually this is sent to all IOMMU drivers. there is no check on whether
> a specific driver has enabled ATS in this path.

But the comment doesn't say "check"..

How about "Notify the iommu driver that enables/disables ATS"?

The point is that pci_enable_ats() is called in iommu drivers.

> > > > +	/* Have to call it after waiting for pending DMA transaction */
> > > > +	ret = pci_reset_iommu_prepare(dev);
> > > > +	if (ret) {
> > > > +		pci_err(dev, "failed to stop IOMMU\n");
> > >
> > > the error message could be more informative.
> > 
> > OK. Perhaps print the ret value.
> > 
> 
> and mention that it's for PCI reset.

OK.

Thanks
Nicolin
RE: [PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Tian, Kevin 2 months, 3 weeks ago
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Tuesday, November 18, 2025 3:27 AM
> 
> On Mon, Nov 17, 2025 at 04:52:05AM +0000, Tian, Kevin wrote:
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: Saturday, November 15, 2025 2:01 AM
> > >
> > > On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
> > > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > > Sent: Tuesday, November 11, 2025 1:13 PM
> > > > >
> > > > > +/*
> > > > > + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software
> disables
> > > ATS
> > > > > before
> > > > > + * initiating a reset. Notify the iommu driver that enabled ATS.
> > > > > + */
> > > > > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > > > > +{
> > > > > +	if (pci_ats_supported(dev))
> > > > > +		return iommu_dev_reset_prepare(&dev->dev);
> > > > > +	return 0;
> > > > > +}
> > > >
> > > > the comment says "driver that enabled ATS", but the code checks
> > > > whether ATS is supported.
> > > >
> > > > which one is desired?
> > >
> > > The comments says "the iommu driver that enabled ATS". It doesn't
> > > conflict with what the PCI core checks here?
> >
> > actually this is sent to all IOMMU drivers. there is no check on whether
> > a specific driver has enabled ATS in this path.
> 
> But the comment doesn't say "check"..
> 
> How about "Notify the iommu driver that enables/disables ATS"?
> 
> The point is that pci_enable_ats() is called in iommu drivers.
> 

but in current way even an iommu driver which doesn't call
pci_enable_ats() will also be notified then I didn't see the
point of adding an attribute to "the iommu driver".
Re: [PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Nicolin Chen 2 months, 3 weeks ago
On Tue, Nov 18, 2025 at 12:29:43AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Tuesday, November 18, 2025 3:27 AM
> > 
> > On Mon, Nov 17, 2025 at 04:52:05AM +0000, Tian, Kevin wrote:
> > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > Sent: Saturday, November 15, 2025 2:01 AM
> > > >
> > > > On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
> > > > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > > > Sent: Tuesday, November 11, 2025 1:13 PM
> > > > > >
> > > > > > +/*
> > > > > > + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software
> > disables
> > > > ATS
> > > > > > before
> > > > > > + * initiating a reset. Notify the iommu driver that enabled ATS.
> > > > > > + */
> > > > > > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > > > > > +{
> > > > > > +	if (pci_ats_supported(dev))
> > > > > > +		return iommu_dev_reset_prepare(&dev->dev);
> > > > > > +	return 0;
> > > > > > +}
> > > > >
> > > > > the comment says "driver that enabled ATS", but the code checks
> > > > > whether ATS is supported.
> > > > >
> > > > > which one is desired?
> > > >
> > > > The comments says "the iommu driver that enabled ATS". It doesn't
> > > > conflict with what the PCI core checks here?
> > >
> > > actually this is sent to all IOMMU drivers. there is no check on whether
> > > a specific driver has enabled ATS in this path.
> > 
> > But the comment doesn't say "check"..
> > 
> > How about "Notify the iommu driver that enables/disables ATS"?
> > 
> > The point is that pci_enable_ats() is called in iommu drivers.
> > 
> 
> but in current way even an iommu driver which doesn't call
> pci_enable_ats() will also be notified then I didn't see the
> point of adding an attribute to "the iommu driver".

Hmm, that's a fair point.

Having looked closely, I see only AMD and ARM call that to enable
ATs. How others (e.g. Intel) enable it?

And how do you think of the followings?

/*
 * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS before
 * initiating a reset. Though not all IOMMU drivers calls pci_enable_ats(), it
 * only gets invoked in IOMMU driver. And it is racy to check dev->ats_enabled
 * here, as a concurrent IOMMU attachment can enable ATS right after this line.
 *
 * Notify the IOMMU driver to stop IOMMU translations until the reset is done,
 * to ensure that the ATS function and its related invalidations are disabled.
 */

Thanks
Nicolin
RE: [PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Tian, Kevin 2 months, 2 weeks ago
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Tuesday, November 18, 2025 9:42 AM
> 
> On Tue, Nov 18, 2025 at 12:29:43AM +0000, Tian, Kevin wrote:
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: Tuesday, November 18, 2025 3:27 AM
> > >
> > > On Mon, Nov 17, 2025 at 04:52:05AM +0000, Tian, Kevin wrote:
> > > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > > Sent: Saturday, November 15, 2025 2:01 AM
> > > > >
> > > > > On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
> > > > > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > > > > Sent: Tuesday, November 11, 2025 1:13 PM
> > > > > > >
> > > > > > > +/*
> > > > > > > + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software
> > > disables
> > > > > ATS
> > > > > > > before
> > > > > > > + * initiating a reset. Notify the iommu driver that enabled ATS.
> > > > > > > + */
> > > > > > > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > > > > > > +{
> > > > > > > +	if (pci_ats_supported(dev))
> > > > > > > +		return iommu_dev_reset_prepare(&dev->dev);
> > > > > > > +	return 0;
> > > > > > > +}
> > > > > >
> > > > > > the comment says "driver that enabled ATS", but the code checks
> > > > > > whether ATS is supported.
> > > > > >
> > > > > > which one is desired?
> > > > >
> > > > > The comments says "the iommu driver that enabled ATS". It doesn't
> > > > > conflict with what the PCI core checks here?
> > > >
> > > > actually this is sent to all IOMMU drivers. there is no check on whether
> > > > a specific driver has enabled ATS in this path.
> > >
> > > But the comment doesn't say "check"..
> > >
> > > How about "Notify the iommu driver that enables/disables ATS"?
> > >
> > > The point is that pci_enable_ats() is called in iommu drivers.
> > >
> >
> > but in current way even an iommu driver which doesn't call
> > pci_enable_ats() will also be notified then I didn't see the
> > point of adding an attribute to "the iommu driver".
> 
> Hmm, that's a fair point.
> 
> Having looked closely, I see only AMD and ARM call that to enable
> ATs. How others (e.g. Intel) enable it?
> 
> And how do you think of the followings?
> 
> /*
>  * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
> before
>  * initiating a reset. Though not all IOMMU drivers calls pci_enable_ats(), it
>  * only gets invoked in IOMMU driver. And it is racy to check dev-
> >ats_enabled
>  * here, as a concurrent IOMMU attachment can enable ATS right after this
> line.
>  *
>  * Notify the IOMMU driver to stop IOMMU translations until the reset is
> done,
>  * to ensure that the ATS function and its related invalidations are disabled.
>  */
> 

I'd remove the words between "Though not ..." and "after this line", which
could be explained in iommu side following Bjorn's suggestion to not check
pci_ats_supported() in pci core.
Re: [PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Nicolin Chen 2 months, 2 weeks ago
On Tue, Nov 18, 2025 at 07:53:27AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > And how do you think of the followings?
> > 
> > /*
> >  * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
> > before
> >  * initiating a reset. Though not all IOMMU drivers calls pci_enable_ats(), it
> >  * only gets invoked in IOMMU driver. And it is racy to check dev-
> > >ats_enabled
> >  * here, as a concurrent IOMMU attachment can enable ATS right after this
> > line.
> >  *
> >  * Notify the IOMMU driver to stop IOMMU translations until the reset is
> > done,
> >  * to ensure that the ATS function and its related invalidations are disabled.
> >  */
> > 
> 
> I'd remove the words between "Though not ..." and "after this line", which
> could be explained in iommu side following Bjorn's suggestion to not check
> pci_ats_supported() in pci core.

OK. Thanks!

Nicolin
Re: [PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Baolu Lu 2 months, 3 weeks ago
On 11/18/25 09:42, Nicolin Chen wrote:
> On Tue, Nov 18, 2025 at 12:29:43AM +0000, Tian, Kevin wrote:
>>> From: Nicolin Chen<nicolinc@nvidia.com>
>>> Sent: Tuesday, November 18, 2025 3:27 AM
>>>
>>> On Mon, Nov 17, 2025 at 04:52:05AM +0000, Tian, Kevin wrote:
>>>>> From: Nicolin Chen<nicolinc@nvidia.com>
>>>>> Sent: Saturday, November 15, 2025 2:01 AM
>>>>>
>>>>> On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
>>>>>>> From: Nicolin Chen<nicolinc@nvidia.com>
>>>>>>> Sent: Tuesday, November 11, 2025 1:13 PM
>>>>>>>
>>>>>>> +/*
>>>>>>> + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software
>>> disables
>>>>> ATS
>>>>>>> before
>>>>>>> + * initiating a reset. Notify the iommu driver that enabled ATS.
>>>>>>> + */
>>>>>>> +int pci_reset_iommu_prepare(struct pci_dev *dev)
>>>>>>> +{
>>>>>>> +	if (pci_ats_supported(dev))
>>>>>>> +		return iommu_dev_reset_prepare(&dev->dev);
>>>>>>> +	return 0;
>>>>>>> +}
>>>>>> the comment says "driver that enabled ATS", but the code checks
>>>>>> whether ATS is supported.
>>>>>>
>>>>>> which one is desired?
>>>>> The comments says "the iommu driver that enabled ATS". It doesn't
>>>>> conflict with what the PCI core checks here?
>>>> actually this is sent to all IOMMU drivers. there is no check on whether
>>>> a specific driver has enabled ATS in this path.
>>> But the comment doesn't say "check"..
>>>
>>> How about "Notify the iommu driver that enables/disables ATS"?
>>>
>>> The point is that pci_enable_ats() is called in iommu drivers.
>>>
>> but in current way even an iommu driver which doesn't call
>> pci_enable_ats() will also be notified then I didn't see the
>> point of adding an attribute to "the iommu driver".
> Hmm, that's a fair point.
> 
> Having looked closely, I see only AMD and ARM call that to enable
> ATs. How others (e.g. Intel) enable it?

The VT-d driver enables ATS in the iommu probe_finalize() path (for
scalable mode).

static void intel_iommu_probe_finalize(struct device *dev)
{

[...]
         if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
                 iommu_enable_pci_ats(info);
                 /* Assign a DEVTLB cache tag to the default domain. */
                 if (info->ats_enabled && info->domain) {
                         u16 did = domain_id_iommu(info->domain, iommu);

                         if (cache_tag_assign(info->domain, did, dev,
                                              IOMMU_NO_PASID, 
CACHE_TAG_DEVTLB))
                                 iommu_disable_pci_ats(info);
                 }
         }

[...]
}

iommu_enable_pci_ats() will eventually call pci_enable_ats() after some
necessary checks.

Thanks,
baolu
Re: [PATCH v5 5/5] pci: Suspend iommu function prior to resetting a device
Posted by Nicolin Chen 2 months, 2 weeks ago
On Tue, Nov 18, 2025 at 01:38:40PM +0800, Baolu Lu wrote:
> The VT-d driver enables ATS in the iommu probe_finalize() path (for
> scalable mode).
.. 
> iommu_enable_pci_ats() will eventually call pci_enable_ats() after some
> necessary checks.

Oh, I missed that one.

Thanks!
Nicolin