PCIe permits a device to ignore ATS invalidation TLPs, while processing a
reset. This creates a problem visible to the OS where an ATS invalidation
command will time out: e.g. an SVA domain will have no coordination with a
reset event and can racily issue ATS invalidations to a resetting device.
The PCIe spec in sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and
block ATS before initiating a Function Level Reset. It also mentions that
other reset methods could have the same vulnerability as well.
Now iommu_dev_reset_prepare/done() helpers are introduced for this matter.
Use them in all the existing reset functions, which will attach the device
to an IOMMU_DOMAIN_BLOCKED during a reset, so as to allow IOMMU driver to:
- invoke pci_disable_ats() and pci_enable_ats(), if necessary
- wait for all ATS invalidations to complete
- stop issuing new ATS invalidations
- fence any incoming ATS queries
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/pci/pci.h | 2 ++
drivers/pci/pci-acpi.c | 12 ++++++--
drivers/pci/pci.c | 68 ++++++++++++++++++++++++++++++++++++++----
drivers/pci/quirks.c | 18 ++++++++++-
4 files changed, 92 insertions(+), 8 deletions(-)
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 4492b809094b5..a29286dfd870c 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -198,6 +198,8 @@ void pci_init_reset_methods(struct pci_dev *dev);
int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
int pci_bus_error_reset(struct pci_dev *dev);
int __pci_reset_bus(struct pci_bus *bus);
+int pci_reset_iommu_prepare(struct pci_dev *dev);
+void pci_reset_iommu_done(struct pci_dev *dev);
struct pci_cap_saved_data {
u16 cap_nr;
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 9369377725fa0..60d29b183f2c2 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -971,6 +971,7 @@ void pci_set_acpi_fwnode(struct pci_dev *dev)
int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
{
acpi_handle handle = ACPI_HANDLE(&dev->dev);
+ int ret = 0;
if (!handle || !acpi_has_method(handle, "_RST"))
return -ENOTTY;
@@ -978,12 +979,19 @@ int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
if (probe)
return 0;
+ ret = pci_reset_iommu_prepare(dev);
+ if (ret) {
+ pci_err(dev, "failed to stop IOMMU\n");
+ return ret;
+ }
+
if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) {
pci_warn(dev, "ACPI _RST failed\n");
- return -ENOTTY;
+ ret = -ENOTTY;
}
- return 0;
+ pci_reset_iommu_done(dev);
+ return ret;
}
bool acpi_pci_power_manageable(struct pci_dev *dev)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b14dd064006cc..52461d952cbf1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -13,6 +13,7 @@
#include <linux/delay.h>
#include <linux/dmi.h>
#include <linux/init.h>
+#include <linux/iommu.h>
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/pci.h>
@@ -25,6 +26,7 @@
#include <linux/logic_pio.h>
#include <linux/device.h>
#include <linux/pm_runtime.h>
+#include <linux/pci-ats.h>
#include <linux/pci_hotplug.h>
#include <linux/vmalloc.h>
#include <asm/dma.h>
@@ -95,6 +97,23 @@ bool pci_reset_supported(struct pci_dev *dev)
return dev->reset_methods[0] != 0;
}
+/*
+ * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS before
+ * initiating a reset. Notify the iommu driver that enabled ATS.
+ */
+int pci_reset_iommu_prepare(struct pci_dev *dev)
+{
+ if (pci_ats_supported(dev))
+ return iommu_dev_reset_prepare(&dev->dev);
+ return 0;
+}
+
+void pci_reset_iommu_done(struct pci_dev *dev)
+{
+ if (pci_ats_supported(dev))
+ iommu_dev_reset_done(&dev->dev);
+}
+
#ifdef CONFIG_PCI_DOMAINS
int pci_domains_supported = 1;
#endif
@@ -4478,13 +4497,22 @@ EXPORT_SYMBOL(pci_wait_for_pending_transaction);
*/
int pcie_flr(struct pci_dev *dev)
{
+ int ret = 0;
+
if (!pci_wait_for_pending_transaction(dev))
pci_err(dev, "timed out waiting for pending transaction; performing function level reset anyway\n");
+ /* Have to call it after waiting for pending DMA transaction */
+ ret = pci_reset_iommu_prepare(dev);
+ if (ret) {
+ pci_err(dev, "failed to stop IOMMU\n");
+ return ret;
+ }
+
pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
if (dev->imm_ready)
- return 0;
+ goto done;
/*
* Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within
@@ -4493,7 +4521,10 @@ int pcie_flr(struct pci_dev *dev)
*/
msleep(100);
- return pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
+ ret = pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
+done:
+ pci_reset_iommu_done(dev);
+ return ret;
}
EXPORT_SYMBOL_GPL(pcie_flr);
@@ -4521,6 +4552,7 @@ EXPORT_SYMBOL_GPL(pcie_reset_flr);
static int pci_af_flr(struct pci_dev *dev, bool probe)
{
+ int ret = 0;
int pos;
u8 cap;
@@ -4547,10 +4579,17 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
PCI_AF_STATUS_TP << 8))
pci_err(dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n");
+ /* Have to call it after waiting for pending DMA transaction */
+ ret = pci_reset_iommu_prepare(dev);
+ if (ret) {
+ pci_err(dev, "failed to stop IOMMU\n");
+ return ret;
+ }
+
pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
if (dev->imm_ready)
- return 0;
+ goto done;
/*
* Per Advanced Capabilities for Conventional PCI ECN, 13 April 2006,
@@ -4560,7 +4599,10 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
*/
msleep(100);
- return pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
+ ret = pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
+done:
+ pci_reset_iommu_done(dev);
+ return ret;
}
/**
@@ -4581,6 +4623,7 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
static int pci_pm_reset(struct pci_dev *dev, bool probe)
{
u16 csr;
+ int ret;
if (!dev->pm_cap || dev->dev_flags & PCI_DEV_FLAGS_NO_PM_RESET)
return -ENOTTY;
@@ -4595,6 +4638,12 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe)
if (dev->current_state != PCI_D0)
return -EINVAL;
+ ret = pci_reset_iommu_prepare(dev);
+ if (ret) {
+ pci_err(dev, "failed to stop IOMMU\n");
+ return ret;
+ }
+
csr &= ~PCI_PM_CTRL_STATE_MASK;
csr |= PCI_D3hot;
pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
@@ -4605,7 +4654,9 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe)
pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
pci_dev_d3_sleep(dev);
- return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
+ ret = pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
+ pci_reset_iommu_done(dev);
+ return ret;
}
/**
@@ -5060,6 +5111,12 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
if (rc)
return -ENOTTY;
+ rc = pci_reset_iommu_prepare(dev);
+ if (rc) {
+ pci_err(dev, "failed to stop IOMMU\n");
+ return rc;
+ }
+
if (reg & PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR) {
val = reg;
} else {
@@ -5074,6 +5131,7 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
pci_write_config_word(bridge, dvsec + PCI_DVSEC_CXL_PORT_CTL,
reg);
+ pci_reset_iommu_done(dev);
return rc;
}
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 214ed060ca1b3..891d9e5a97e93 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4226,6 +4226,22 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
{ 0 }
};
+static int __pci_dev_specific_reset(struct pci_dev *dev, bool probe,
+ const struct pci_dev_reset_methods *i)
+{
+ int ret;
+
+ ret = pci_reset_iommu_prepare(dev);
+ if (ret) {
+ pci_err(dev, "failed to stop IOMMU\n");
+ return ret;
+ }
+
+ ret = i->reset(dev, probe);
+ pci_reset_iommu_done(dev);
+ return ret;
+}
+
/*
* These device-specific reset methods are here rather than in a driver
* because when a host assigns a device to a guest VM, the host may need
@@ -4240,7 +4256,7 @@ int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
i->vendor == (u16)PCI_ANY_ID) &&
(i->device == dev->device ||
i->device == (u16)PCI_ANY_ID))
- return i->reset(dev, probe);
+ return __pci_dev_specific_reset(dev, probe, i);
}
return -ENOTTY;
--
2.43.0
On Mon, Nov 10, 2025 at 09:12:55PM -0800, Nicolin Chen wrote:
Run "git log --oneline drivers/pci/pci.c" and match the subject line
style.
> PCIe permits a device to ignore ATS invalidation TLPs, while processing a
> reset. This creates a problem visible to the OS where an ATS invalidation
> command will time out: e.g. an SVA domain will have no coordination with a
> reset event and can racily issue ATS invalidations to a resetting device.
s/TLPs, while/TLPs while/
> The PCIe spec in sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and
> block ATS before initiating a Function Level Reset. It also mentions that
> other reset methods could have the same vulnerability as well.
Include spec revision, e.g., "PCIe r7.0, sec 10.3.1".
> Now iommu_dev_reset_prepare/done() helpers are introduced for this matter.
s/Now ... are introduced for this matter/Add ...helpers/
> Use them in all the existing reset functions, which will attach the device
> to an IOMMU_DOMAIN_BLOCKED during a reset, so as to allow IOMMU driver to:
> - invoke pci_disable_ats() and pci_enable_ats(), if necessary
> - wait for all ATS invalidations to complete
> - stop issuing new ATS invalidations
> - fence any incoming ATS queries
Thanks for addressing this problem.
> +++ b/drivers/pci/pci-acpi.c
> @@ -971,6 +971,7 @@ void pci_set_acpi_fwnode(struct pci_dev *dev)
> int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
> {
> acpi_handle handle = ACPI_HANDLE(&dev->dev);
> + int ret = 0;
Unnecessary initialization.
> +int pci_reset_iommu_prepare(struct pci_dev *dev)
> +{
> + if (pci_ats_supported(dev))
> + return iommu_dev_reset_prepare(&dev->dev);
Why bother checking pci_ats_supported() here? That could be done
inside iommu_dev_reset_prepare(), since iommu.c already uses
dev_is_pci() and pci_ats_supported() is already exported outside
drivers/pci/.
> +void pci_reset_iommu_done(struct pci_dev *dev)
> +{
> + if (pci_ats_supported(dev))
> + iommu_dev_reset_done(&dev->dev);
And here.
> int pcie_flr(struct pci_dev *dev)
> {
> + int ret = 0;
Unnecessary initialization.
> static int pci_af_flr(struct pci_dev *dev, bool probe)
> {
> + int ret = 0;
Unnecessary initialization.
On Mon, Nov 17, 2025 at 04:58:52PM -0600, Bjorn Helgaas wrote:
> On Mon, Nov 10, 2025 at 09:12:55PM -0800, Nicolin Chen wrote:
> > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > +{
> > + if (pci_ats_supported(dev))
> > + return iommu_dev_reset_prepare(&dev->dev);
>
> Why bother checking pci_ats_supported() here? That could be done
> inside iommu_dev_reset_prepare(), since iommu.c already uses
> dev_is_pci() and pci_ats_supported() is already exported outside
> drivers/pci/.
Ack. I will fix all of these.
Thanks for the review!
Nicolin
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Tuesday, November 11, 2025 1:13 PM
>
> PCIe permits a device to ignore ATS invalidation TLPs, while processing a
> reset. This creates a problem visible to the OS where an ATS invalidation
> command will time out: e.g. an SVA domain will have no coordination with a
> reset event and can racily issue ATS invalidations to a resetting device.
>
> The PCIe spec in sec 10.3.1 IMPLEMENTATION NOTE recommends to disable
> and
> block ATS before initiating a Function Level Reset. It also mentions that
> other reset methods could have the same vulnerability as well.
>
> Now iommu_dev_reset_prepare/done() helpers are introduced for this
> matter.
> Use them in all the existing reset functions, which will attach the device
looks pci_reset_bus_function() was missed?
> @@ -971,6 +971,7 @@ void pci_set_acpi_fwnode(struct pci_dev *dev)
> int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
> {
> acpi_handle handle = ACPI_HANDLE(&dev->dev);
> + int ret = 0;
no need to initialize it. ditto for other reset functions.
> +/*
> + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
> before
> + * initiating a reset. Notify the iommu driver that enabled ATS.
> + */
> +int pci_reset_iommu_prepare(struct pci_dev *dev)
> +{
> + if (pci_ats_supported(dev))
> + return iommu_dev_reset_prepare(&dev->dev);
> + return 0;
> +}
the comment says "driver that enabled ATS", but the code checks
whether ATS is supported.
which one is desired?
>
> + /* Have to call it after waiting for pending DMA transaction */
> + ret = pci_reset_iommu_prepare(dev);
> + if (ret) {
> + pci_err(dev, "failed to stop IOMMU\n");
the error message could be more informative.
On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Tuesday, November 11, 2025 1:13 PM
> >
> > PCIe permits a device to ignore ATS invalidation TLPs, while processing a
> > reset. This creates a problem visible to the OS where an ATS invalidation
> > command will time out: e.g. an SVA domain will have no coordination with a
> > reset event and can racily issue ATS invalidations to a resetting device.
> >
> > The PCIe spec in sec 10.3.1 IMPLEMENTATION NOTE recommends to disable
> > and
> > block ATS before initiating a Function Level Reset. It also mentions that
> > other reset methods could have the same vulnerability as well.
> >
> > Now iommu_dev_reset_prepare/done() helpers are introduced for this
> > matter.
> > Use them in all the existing reset functions, which will attach the device
>
> looks pci_reset_bus_function() was missed?
Will add that.
> > @@ -971,6 +971,7 @@ void pci_set_acpi_fwnode(struct pci_dev *dev)
> > int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
> > {
> > acpi_handle handle = ACPI_HANDLE(&dev->dev);
> > + int ret = 0;
>
> no need to initialize it. ditto for other reset functions.
Ack.
> > +/*
> > + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
> > before
> > + * initiating a reset. Notify the iommu driver that enabled ATS.
> > + */
> > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > +{
> > + if (pci_ats_supported(dev))
> > + return iommu_dev_reset_prepare(&dev->dev);
> > + return 0;
> > +}
>
> the comment says "driver that enabled ATS", but the code checks
> whether ATS is supported.
>
> which one is desired?
The comments says "the iommu driver that enabled ATS". It doesn't
conflict with what the PCI core checks here?
> > + /* Have to call it after waiting for pending DMA transaction */
> > + ret = pci_reset_iommu_prepare(dev);
> > + if (ret) {
> > + pci_err(dev, "failed to stop IOMMU\n");
>
> the error message could be more informative.
OK. Perhaps print the ret value.
Thanks!
Nicolin
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Saturday, November 15, 2025 2:01 AM
>
> On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: Tuesday, November 11, 2025 1:13 PM
> > >
> > > +/*
> > > + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables
> ATS
> > > before
> > > + * initiating a reset. Notify the iommu driver that enabled ATS.
> > > + */
> > > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > > +{
> > > + if (pci_ats_supported(dev))
> > > + return iommu_dev_reset_prepare(&dev->dev);
> > > + return 0;
> > > +}
> >
> > the comment says "driver that enabled ATS", but the code checks
> > whether ATS is supported.
> >
> > which one is desired?
>
> The comments says "the iommu driver that enabled ATS". It doesn't
> conflict with what the PCI core checks here?
actually this is sent to all IOMMU drivers. there is no check on whether
a specific driver has enabled ATS in this path.
>
> > > + /* Have to call it after waiting for pending DMA transaction */
> > > + ret = pci_reset_iommu_prepare(dev);
> > > + if (ret) {
> > > + pci_err(dev, "failed to stop IOMMU\n");
> >
> > the error message could be more informative.
>
> OK. Perhaps print the ret value.
>
and mention that it's for PCI reset.
On Mon, Nov 17, 2025 at 04:52:05AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Saturday, November 15, 2025 2:01 AM
> >
> > On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
> > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > Sent: Tuesday, November 11, 2025 1:13 PM
> > > >
> > > > +/*
> > > > + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables
> > ATS
> > > > before
> > > > + * initiating a reset. Notify the iommu driver that enabled ATS.
> > > > + */
> > > > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > > > +{
> > > > + if (pci_ats_supported(dev))
> > > > + return iommu_dev_reset_prepare(&dev->dev);
> > > > + return 0;
> > > > +}
> > >
> > > the comment says "driver that enabled ATS", but the code checks
> > > whether ATS is supported.
> > >
> > > which one is desired?
> >
> > The comments says "the iommu driver that enabled ATS". It doesn't
> > conflict with what the PCI core checks here?
>
> actually this is sent to all IOMMU drivers. there is no check on whether
> a specific driver has enabled ATS in this path.
But the comment doesn't say "check"..
How about "Notify the iommu driver that enables/disables ATS"?
The point is that pci_enable_ats() is called in iommu drivers.
> > > > + /* Have to call it after waiting for pending DMA transaction */
> > > > + ret = pci_reset_iommu_prepare(dev);
> > > > + if (ret) {
> > > > + pci_err(dev, "failed to stop IOMMU\n");
> > >
> > > the error message could be more informative.
> >
> > OK. Perhaps print the ret value.
> >
>
> and mention that it's for PCI reset.
OK.
Thanks
Nicolin
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Tuesday, November 18, 2025 3:27 AM
>
> On Mon, Nov 17, 2025 at 04:52:05AM +0000, Tian, Kevin wrote:
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: Saturday, November 15, 2025 2:01 AM
> > >
> > > On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
> > > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > > Sent: Tuesday, November 11, 2025 1:13 PM
> > > > >
> > > > > +/*
> > > > > + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software
> disables
> > > ATS
> > > > > before
> > > > > + * initiating a reset. Notify the iommu driver that enabled ATS.
> > > > > + */
> > > > > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > > > > +{
> > > > > + if (pci_ats_supported(dev))
> > > > > + return iommu_dev_reset_prepare(&dev->dev);
> > > > > + return 0;
> > > > > +}
> > > >
> > > > the comment says "driver that enabled ATS", but the code checks
> > > > whether ATS is supported.
> > > >
> > > > which one is desired?
> > >
> > > The comments says "the iommu driver that enabled ATS". It doesn't
> > > conflict with what the PCI core checks here?
> >
> > actually this is sent to all IOMMU drivers. there is no check on whether
> > a specific driver has enabled ATS in this path.
>
> But the comment doesn't say "check"..
>
> How about "Notify the iommu driver that enables/disables ATS"?
>
> The point is that pci_enable_ats() is called in iommu drivers.
>
but in current way even an iommu driver which doesn't call
pci_enable_ats() will also be notified then I didn't see the
point of adding an attribute to "the iommu driver".
On Tue, Nov 18, 2025 at 12:29:43AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Tuesday, November 18, 2025 3:27 AM
> >
> > On Mon, Nov 17, 2025 at 04:52:05AM +0000, Tian, Kevin wrote:
> > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > Sent: Saturday, November 15, 2025 2:01 AM
> > > >
> > > > On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
> > > > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > > > Sent: Tuesday, November 11, 2025 1:13 PM
> > > > > >
> > > > > > +/*
> > > > > > + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software
> > disables
> > > > ATS
> > > > > > before
> > > > > > + * initiating a reset. Notify the iommu driver that enabled ATS.
> > > > > > + */
> > > > > > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > > > > > +{
> > > > > > + if (pci_ats_supported(dev))
> > > > > > + return iommu_dev_reset_prepare(&dev->dev);
> > > > > > + return 0;
> > > > > > +}
> > > > >
> > > > > the comment says "driver that enabled ATS", but the code checks
> > > > > whether ATS is supported.
> > > > >
> > > > > which one is desired?
> > > >
> > > > The comments says "the iommu driver that enabled ATS". It doesn't
> > > > conflict with what the PCI core checks here?
> > >
> > > actually this is sent to all IOMMU drivers. there is no check on whether
> > > a specific driver has enabled ATS in this path.
> >
> > But the comment doesn't say "check"..
> >
> > How about "Notify the iommu driver that enables/disables ATS"?
> >
> > The point is that pci_enable_ats() is called in iommu drivers.
> >
>
> but in current way even an iommu driver which doesn't call
> pci_enable_ats() will also be notified then I didn't see the
> point of adding an attribute to "the iommu driver".
Hmm, that's a fair point.
Having looked closely, I see only AMD and ARM call that to enable
ATs. How others (e.g. Intel) enable it?
And how do you think of the followings?
/*
* Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS before
* initiating a reset. Though not all IOMMU drivers calls pci_enable_ats(), it
* only gets invoked in IOMMU driver. And it is racy to check dev->ats_enabled
* here, as a concurrent IOMMU attachment can enable ATS right after this line.
*
* Notify the IOMMU driver to stop IOMMU translations until the reset is done,
* to ensure that the ATS function and its related invalidations are disabled.
*/
Thanks
Nicolin
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Tuesday, November 18, 2025 9:42 AM
>
> On Tue, Nov 18, 2025 at 12:29:43AM +0000, Tian, Kevin wrote:
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: Tuesday, November 18, 2025 3:27 AM
> > >
> > > On Mon, Nov 17, 2025 at 04:52:05AM +0000, Tian, Kevin wrote:
> > > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > > Sent: Saturday, November 15, 2025 2:01 AM
> > > > >
> > > > > On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
> > > > > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > > > > Sent: Tuesday, November 11, 2025 1:13 PM
> > > > > > >
> > > > > > > +/*
> > > > > > > + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software
> > > disables
> > > > > ATS
> > > > > > > before
> > > > > > > + * initiating a reset. Notify the iommu driver that enabled ATS.
> > > > > > > + */
> > > > > > > +int pci_reset_iommu_prepare(struct pci_dev *dev)
> > > > > > > +{
> > > > > > > + if (pci_ats_supported(dev))
> > > > > > > + return iommu_dev_reset_prepare(&dev->dev);
> > > > > > > + return 0;
> > > > > > > +}
> > > > > >
> > > > > > the comment says "driver that enabled ATS", but the code checks
> > > > > > whether ATS is supported.
> > > > > >
> > > > > > which one is desired?
> > > > >
> > > > > The comments says "the iommu driver that enabled ATS". It doesn't
> > > > > conflict with what the PCI core checks here?
> > > >
> > > > actually this is sent to all IOMMU drivers. there is no check on whether
> > > > a specific driver has enabled ATS in this path.
> > >
> > > But the comment doesn't say "check"..
> > >
> > > How about "Notify the iommu driver that enables/disables ATS"?
> > >
> > > The point is that pci_enable_ats() is called in iommu drivers.
> > >
> >
> > but in current way even an iommu driver which doesn't call
> > pci_enable_ats() will also be notified then I didn't see the
> > point of adding an attribute to "the iommu driver".
>
> Hmm, that's a fair point.
>
> Having looked closely, I see only AMD and ARM call that to enable
> ATs. How others (e.g. Intel) enable it?
>
> And how do you think of the followings?
>
> /*
> * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
> before
> * initiating a reset. Though not all IOMMU drivers calls pci_enable_ats(), it
> * only gets invoked in IOMMU driver. And it is racy to check dev-
> >ats_enabled
> * here, as a concurrent IOMMU attachment can enable ATS right after this
> line.
> *
> * Notify the IOMMU driver to stop IOMMU translations until the reset is
> done,
> * to ensure that the ATS function and its related invalidations are disabled.
> */
>
I'd remove the words between "Though not ..." and "after this line", which
could be explained in iommu side following Bjorn's suggestion to not check
pci_ats_supported() in pci core.
On Tue, Nov 18, 2025 at 07:53:27AM +0000, Tian, Kevin wrote: > > From: Nicolin Chen <nicolinc@nvidia.com> > > And how do you think of the followings? > > > > /* > > * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS > > before > > * initiating a reset. Though not all IOMMU drivers calls pci_enable_ats(), it > > * only gets invoked in IOMMU driver. And it is racy to check dev- > > >ats_enabled > > * here, as a concurrent IOMMU attachment can enable ATS right after this > > line. > > * > > * Notify the IOMMU driver to stop IOMMU translations until the reset is > > done, > > * to ensure that the ATS function and its related invalidations are disabled. > > */ > > > > I'd remove the words between "Though not ..." and "after this line", which > could be explained in iommu side following Bjorn's suggestion to not check > pci_ats_supported() in pci core. OK. Thanks! Nicolin
On 11/18/25 09:42, Nicolin Chen wrote:
> On Tue, Nov 18, 2025 at 12:29:43AM +0000, Tian, Kevin wrote:
>>> From: Nicolin Chen<nicolinc@nvidia.com>
>>> Sent: Tuesday, November 18, 2025 3:27 AM
>>>
>>> On Mon, Nov 17, 2025 at 04:52:05AM +0000, Tian, Kevin wrote:
>>>>> From: Nicolin Chen<nicolinc@nvidia.com>
>>>>> Sent: Saturday, November 15, 2025 2:01 AM
>>>>>
>>>>> On Fri, Nov 14, 2025 at 09:45:31AM +0000, Tian, Kevin wrote:
>>>>>>> From: Nicolin Chen<nicolinc@nvidia.com>
>>>>>>> Sent: Tuesday, November 11, 2025 1:13 PM
>>>>>>>
>>>>>>> +/*
>>>>>>> + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software
>>> disables
>>>>> ATS
>>>>>>> before
>>>>>>> + * initiating a reset. Notify the iommu driver that enabled ATS.
>>>>>>> + */
>>>>>>> +int pci_reset_iommu_prepare(struct pci_dev *dev)
>>>>>>> +{
>>>>>>> + if (pci_ats_supported(dev))
>>>>>>> + return iommu_dev_reset_prepare(&dev->dev);
>>>>>>> + return 0;
>>>>>>> +}
>>>>>> the comment says "driver that enabled ATS", but the code checks
>>>>>> whether ATS is supported.
>>>>>>
>>>>>> which one is desired?
>>>>> The comments says "the iommu driver that enabled ATS". It doesn't
>>>>> conflict with what the PCI core checks here?
>>>> actually this is sent to all IOMMU drivers. there is no check on whether
>>>> a specific driver has enabled ATS in this path.
>>> But the comment doesn't say "check"..
>>>
>>> How about "Notify the iommu driver that enables/disables ATS"?
>>>
>>> The point is that pci_enable_ats() is called in iommu drivers.
>>>
>> but in current way even an iommu driver which doesn't call
>> pci_enable_ats() will also be notified then I didn't see the
>> point of adding an attribute to "the iommu driver".
> Hmm, that's a fair point.
>
> Having looked closely, I see only AMD and ARM call that to enable
> ATs. How others (e.g. Intel) enable it?
The VT-d driver enables ATS in the iommu probe_finalize() path (for
scalable mode).
static void intel_iommu_probe_finalize(struct device *dev)
{
[...]
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
iommu_enable_pci_ats(info);
/* Assign a DEVTLB cache tag to the default domain. */
if (info->ats_enabled && info->domain) {
u16 did = domain_id_iommu(info->domain, iommu);
if (cache_tag_assign(info->domain, did, dev,
IOMMU_NO_PASID,
CACHE_TAG_DEVTLB))
iommu_disable_pci_ats(info);
}
}
[...]
}
iommu_enable_pci_ats() will eventually call pci_enable_ats() after some
necessary checks.
Thanks,
baolu
On Tue, Nov 18, 2025 at 01:38:40PM +0800, Baolu Lu wrote: > The VT-d driver enables ATS in the iommu probe_finalize() path (for > scalable mode). .. > iommu_enable_pci_ats() will eventually call pci_enable_ats() after some > necessary checks. Oh, I missed that one. Thanks! Nicolin
© 2016 - 2026 Red Hat, Inc.