Allow ATS to be always on for certain ATS-capable devices

[PATCH RFCv1 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices

Posted by Nicolin Chen 3 weeks, 3 days ago

Controlled by the IOMMU driver, ATS is usually enabled "on demand", when a
device requests a translation service from its associated IOMMU HW running
on the channel of a given PASID. This is working even when a device has no
translation on its RID, i.e. RID is IOMMU bypassed.

On the other hand, certain PCIe device requires non-PASID ATS, when its RID
stream is IOMMU bypassed. Call this "always on".

For instance, the CXL spec notes in "3.2.5.13 Memory Type on CXL.cache":
"To source requests on CXL.cache, devices need to get the Host Physical
 Address (HPA) from the Host by means of an ATS request on CXL.io."
In other word, the CXL.cache capability relies on ATS. Otherwise, it won't
have access to the host physical memory.

Introduce a new pci_ats_always_on() for IOMMU driver to scan a PCI device,
to shift ATS policies between "on demand" and "always on".

Add the support for CXL.cache devices first. Non-CXL devices will be added
in quirks.c file.

Suggested-by: Vikram Sethi <vsethi@nvidia.com>
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 include/linux/pci-ats.h       |  3 +++
 include/uapi/linux/pci_regs.h |  5 ++++
 drivers/pci/ats.c             | 44 +++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+)

diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index 75c6c86cf09d..d14ba727d38b 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -12,6 +12,7 @@ int pci_prepare_ats(struct pci_dev *dev, int ps);
 void pci_disable_ats(struct pci_dev *dev);
 int pci_ats_queue_depth(struct pci_dev *dev);
 int pci_ats_page_aligned(struct pci_dev *dev);
+bool pci_ats_always_on(struct pci_dev *dev);
 #else /* CONFIG_PCI_ATS */
 static inline bool pci_ats_supported(struct pci_dev *d)
 { return false; }
@@ -24,6 +25,8 @@ static inline int pci_ats_queue_depth(struct pci_dev *d)
 { return -ENODEV; }
 static inline int pci_ats_page_aligned(struct pci_dev *dev)
 { return 0; }
+static inline bool pci_ats_always_on(struct pci_dev *dev)
+{ return false; }
 #endif /* CONFIG_PCI_ATS */
 
 #ifdef CONFIG_PCI_PRI
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 3add74ae2594..84da6d7645a3 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1258,6 +1258,11 @@
 #define PCI_DVSEC_CXL_PORT_CTL				0x0c
 #define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR		0x00000001
 
+/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */
+#define CXL_DVSEC_PCIE_DEVICE				0
+#define   CXL_DVSEC_CAP_OFFSET				0xA
+#define     CXL_DVSEC_CACHE_CAPABLE			BIT(0)
+
 /* Integrity and Data Encryption Extended Capability */
 #define PCI_IDE_CAP			0x04
 #define  PCI_IDE_CAP_LINK		0x1  /* Link IDE Stream Supported */
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index ec6c8dbdc5e9..1795131f0697 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -205,6 +205,50 @@ int pci_ats_page_aligned(struct pci_dev *pdev)
 	return 0;
 }
 
+/*
+ * CXL r4.0, sec 3.2.5.13 Memory Type on CXL.cache notes: to source requests on
+ * CXL.cache, devices need to get the Host Physical Address (HPA) from the Host
+ * by means of an ATS request on CXL.io.
+ *
+ * In other world, CXL.cache devices cannot access physical memory without ATS.
+ */
+static bool pci_cxl_ats_always_on(struct pci_dev *pdev)
+{
+	int offset;
+	u16 cap;
+
+	offset = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					   CXL_DVSEC_PCIE_DEVICE);
+	if (!offset)
+		return false;
+
+	pci_read_config_word(pdev, offset + CXL_DVSEC_CAP_OFFSET, &cap);
+	if (cap & CXL_DVSEC_CACHE_CAPABLE)
+		return true;
+
+	return false;
+}
+
+/**
+ * pci_ats_always_on - Whether the PCI device requires ATS to be always enabled
+ * @pdev: the PCI device
+ *
+ * Returns true, if the PCI device requires non-PASID ATS function on an IOMMU
+ * bypassed configuration.
+ */
+bool pci_ats_always_on(struct pci_dev *pdev)
+{
+	if (pci_ats_disabled() || !pci_ats_supported(pdev))
+		return false;
+
+	/* A VF inherits its PF's requirement for ATS function */
+	if (pdev->is_virtfn)
+		pdev = pci_physfn(pdev);
+
+	return pci_cxl_ats_always_on(pdev);
+}
+EXPORT_SYMBOL_GPL(pci_ats_always_on);
+
 #ifdef CONFIG_PCI_PRI
 void pci_pri_init(struct pci_dev *pdev)
 {
-- 
2.43.0

RE: [PATCH RFCv1 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices

Posted by Tian, Kevin 2 weeks, 6 days ago

+Dan. I recalled an offline discussion in which he raised concern on
having the kernel blindly enable ATS for cxl.cache device instead of
creating a knob for admin to configure from userspace (in case
security is viewed more important than functionality, upon allowing
DMA to read data out of CPU caches)...

> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Saturday, January 17, 2026 12:57 PM
> 
> Controlled by the IOMMU driver, ATS is usually enabled "on demand", when
> a
> device requests a translation service from its associated IOMMU HW running
> on the channel of a given PASID. This is working even when a device has no
> translation on its RID, i.e. RID is IOMMU bypassed.
> 
> On the other hand, certain PCIe device requires non-PASID ATS, when its RID
> stream is IOMMU bypassed. Call this "always on".
> 
> For instance, the CXL spec notes in "3.2.5.13 Memory Type on CXL.cache":
> "To source requests on CXL.cache, devices need to get the Host Physical
>  Address (HPA) from the Host by means of an ATS request on CXL.io."
> In other word, the CXL.cache capability relies on ATS. Otherwise, it won't
> have access to the host physical memory.
> 
> Introduce a new pci_ats_always_on() for IOMMU driver to scan a PCI device,
> to shift ATS policies between "on demand" and "always on".
> 
> Add the support for CXL.cache devices first. Non-CXL devices will be added
> in quirks.c file.
> 
> Suggested-by: Vikram Sethi <vsethi@nvidia.com>
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
>  include/linux/pci-ats.h       |  3 +++
>  include/uapi/linux/pci_regs.h |  5 ++++
>  drivers/pci/ats.c             | 44 +++++++++++++++++++++++++++++++++++
>  3 files changed, 52 insertions(+)
> 
> diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
> index 75c6c86cf09d..d14ba727d38b 100644
> --- a/include/linux/pci-ats.h
> +++ b/include/linux/pci-ats.h
> @@ -12,6 +12,7 @@ int pci_prepare_ats(struct pci_dev *dev, int ps);
>  void pci_disable_ats(struct pci_dev *dev);
>  int pci_ats_queue_depth(struct pci_dev *dev);
>  int pci_ats_page_aligned(struct pci_dev *dev);
> +bool pci_ats_always_on(struct pci_dev *dev);
>  #else /* CONFIG_PCI_ATS */
>  static inline bool pci_ats_supported(struct pci_dev *d)
>  { return false; }
> @@ -24,6 +25,8 @@ static inline int pci_ats_queue_depth(struct pci_dev *d)
>  { return -ENODEV; }
>  static inline int pci_ats_page_aligned(struct pci_dev *dev)
>  { return 0; }
> +static inline bool pci_ats_always_on(struct pci_dev *dev)
> +{ return false; }
>  #endif /* CONFIG_PCI_ATS */
> 
>  #ifdef CONFIG_PCI_PRI
> diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
> index 3add74ae2594..84da6d7645a3 100644
> --- a/include/uapi/linux/pci_regs.h
> +++ b/include/uapi/linux/pci_regs.h
> @@ -1258,6 +1258,11 @@
>  #define PCI_DVSEC_CXL_PORT_CTL				0x0c
>  #define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR		0x00000001
> 
> +/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */
> +#define CXL_DVSEC_PCIE_DEVICE				0
> +#define   CXL_DVSEC_CAP_OFFSET				0xA
> +#define     CXL_DVSEC_CACHE_CAPABLE			BIT(0)
> +
>  /* Integrity and Data Encryption Extended Capability */
>  #define PCI_IDE_CAP			0x04
>  #define  PCI_IDE_CAP_LINK		0x1  /* Link IDE Stream Supported */
> diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
> index ec6c8dbdc5e9..1795131f0697 100644
> --- a/drivers/pci/ats.c
> +++ b/drivers/pci/ats.c
> @@ -205,6 +205,50 @@ int pci_ats_page_aligned(struct pci_dev *pdev)
>  	return 0;
>  }
> 
> +/*
> + * CXL r4.0, sec 3.2.5.13 Memory Type on CXL.cache notes: to source
> requests on
> + * CXL.cache, devices need to get the Host Physical Address (HPA) from the
> Host
> + * by means of an ATS request on CXL.io.
> + *
> + * In other world, CXL.cache devices cannot access physical memory
> without ATS.
> + */
> +static bool pci_cxl_ats_always_on(struct pci_dev *pdev)
> +{
> +	int offset;
> +	u16 cap;
> +
> +	offset = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
> +					   CXL_DVSEC_PCIE_DEVICE);
> +	if (!offset)
> +		return false;
> +
> +	pci_read_config_word(pdev, offset + CXL_DVSEC_CAP_OFFSET,
> &cap);
> +	if (cap & CXL_DVSEC_CACHE_CAPABLE)
> +		return true;
> +
> +	return false;
> +}
> +
> +/**
> + * pci_ats_always_on - Whether the PCI device requires ATS to be always
> enabled
> + * @pdev: the PCI device
> + *
> + * Returns true, if the PCI device requires non-PASID ATS function on an
> IOMMU
> + * bypassed configuration.
> + */
> +bool pci_ats_always_on(struct pci_dev *pdev)
> +{
> +	if (pci_ats_disabled() || !pci_ats_supported(pdev))
> +		return false;
> +
> +	/* A VF inherits its PF's requirement for ATS function */
> +	if (pdev->is_virtfn)
> +		pdev = pci_physfn(pdev);
> +
> +	return pci_cxl_ats_always_on(pdev);
> +}
> +EXPORT_SYMBOL_GPL(pci_ats_always_on);
> +
>  #ifdef CONFIG_PCI_PRI
>  void pci_pri_init(struct pci_dev *pdev)
>  {
> --
> 2.43.0