[PATCH v2 1/2] iommu/amd: Add HATDis feature support

Ankit Soni posted 2 patches 7 months, 1 week ago
There is a newer version of this series
[PATCH v2 1/2] iommu/amd: Add HATDis feature support
Posted by Ankit Soni 7 months, 1 week ago
Current AMD IOMMU assumes Host Address Translation (HAT) is always
supported, and Linux kernel enables this capability by default. However,
in case of emulated and virtualized IOMMU, this might not be the case.
For example,current QEMU-emulated AMD vIOMMU does not support host
translation for VFIO pass-through device, but the interrupt remapping
support is required for x2APIC (i.e. kvm-msi-ext-dest-id is also not
supported by the guest OS). This would require the guest kernel to boot
with guest kernel option iommu=pt to by-pass the initialization of
host (v1) table.

The AMD I/O Virtualization Technology (IOMMU) Specification Rev 3.10 [1]
introduces a new flag 'HATDis' in the IVHD 11h IOMMU attributes to indicate
that HAT is not supported on a particular IOMMU instance.

Therefore, modifies the AMD IOMMU driver to detect the new HATDis
attributes, and disable host translation and switch to use guest
translation if it is available. Otherwise, the driver will disable DMA
translation.

[1] https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_IOMMU.pdf

Reviewed-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Ankit Soni <Ankit.Soni@amd.com>
---
 drivers/iommu/amd/amd_iommu.h       |  1 +
 drivers/iommu/amd/amd_iommu_types.h |  6 +++++-
 drivers/iommu/amd/init.c            | 21 ++++++++++++++++++++-
 drivers/iommu/amd/iommu.c           | 13 +++++++++++++
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 220c598b7e14..bb14c4800dd0 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -43,6 +43,7 @@ extern int amd_iommu_guest_ir;
 extern enum protection_domain_mode amd_iommu_pgtable;
 extern int amd_iommu_gpt_level;
 extern unsigned long amd_iommu_pgsize_bitmap;
+extern bool amd_iommu_hatdis;
 
 /* Protection domain ops */
 void amd_iommu_init_identity_domain(void);
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 5089b58e528a..284ff4309660 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -460,6 +460,9 @@
 /* IOMMU Feature Reporting Field (for IVHD type 10h */
 #define IOMMU_FEAT_GASUP_SHIFT	6
 
+/* IOMMU HATDIS for IVHD type 11h and 40h */
+#define IOMMU_IVHD_ATTR_HATDIS_SHIFT	0
+
 /* IOMMU Extended Feature Register (EFR) */
 #define IOMMU_EFR_XTSUP_SHIFT	2
 #define IOMMU_EFR_GASUP_SHIFT	7
@@ -558,7 +561,8 @@ struct amd_io_pgtable {
 };
 
 enum protection_domain_mode {
-	PD_MODE_V1 = 1,
+	PD_MODE_NONE,
+	PD_MODE_V1,
 	PD_MODE_V2,
 };
 
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 14aa0d77df26..cfda9f89fe1d 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -168,6 +168,9 @@ static int amd_iommu_target_ivhd_type;
 u64 amd_iommu_efr;
 u64 amd_iommu_efr2;
 
+/* Host (v1) page table is not supported*/
+bool amd_iommu_hatdis;
+
 /* SNP is enabled on the system? */
 bool amd_iommu_snp_en;
 EXPORT_SYMBOL(amd_iommu_snp_en);
@@ -1798,6 +1801,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
 		if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
 			amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
 
+		if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) {
+			pr_warn_once("Host Address Translation is not supported.\n");
+			amd_iommu_hatdis = true;
+		}
+
 		early_iommu_features_init(iommu, h);
 
 		break;
@@ -2582,7 +2590,7 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
 	u32 devid;
 	struct dev_table_entry *dev_table = pci_seg->dev_table;
 
-	if (dev_table == NULL)
+	if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE)
 		return;
 
 	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
@@ -3095,6 +3103,17 @@ static int __init early_amd_iommu_init(void)
 		}
 	}
 
+	if (amd_iommu_hatdis) {
+		/*
+		 * Host (v1) page table is not available. Attempt to use
+		 * Guest (v2) page table.
+		 */
+		if (amd_iommu_v2_pgtbl_supported())
+			amd_iommu_pgtable = PD_MODE_V2;
+		else
+			amd_iommu_pgtable = PD_MODE_NONE;
+	}
+
 	/* Disable any previously enabled IOMMUs */
 	if (!is_kdump_kernel() || amd_iommu_disabled)
 		disable_iommus();
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index f34209b08b4c..4e9a57377b8c 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2393,6 +2393,13 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
 					     pci_max_pasids(to_pci_dev(dev)));
 	}
 
+	if (amd_iommu_pgtable == PD_MODE_NONE) {
+		pr_warn_once("%s: DMA translation not supported by iommu.\n",
+			     __func__);
+		iommu_dev = ERR_PTR(-ENODEV);
+		goto out_err;
+	}
+
 out_err:
 
 	iommu_completion_wait(iommu);
@@ -2480,6 +2487,9 @@ static int pdom_setup_pgtable(struct protection_domain *domain,
 	case PD_MODE_V2:
 		fmt = AMD_IOMMU_V2;
 		break;
+	case PD_MODE_NONE:
+		WARN_ON_ONCE(1);
+		return -EPERM;
 	}
 
 	domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev);
@@ -2501,6 +2511,9 @@ static inline u64 dma_max_address(enum protection_domain_mode pgtable)
 
 static bool amd_iommu_hd_support(struct amd_iommu *iommu)
 {
+	if (amd_iommu_hatdis)
+		return false;
+
 	return iommu && (iommu->features & FEATURE_HDSUP);
 }
 
-- 
2.43.0
Re: [PATCH v2 1/2] iommu/amd: Add HATDis feature support
Posted by Joao Martins 7 months, 1 week ago
On 07/05/2025 11:51, Ankit Soni wrote:
> Current AMD IOMMU assumes Host Address Translation (HAT) is always
> supported, and Linux kernel enables this capability by default. However,
> in case of emulated and virtualized IOMMU, this might not be the case.
> For example,current QEMU-emulated AMD vIOMMU does not support host
> translation for VFIO pass-through device, but the interrupt remapping
> support is required for x2APIC (i.e. kvm-msi-ext-dest-id is also not
> supported by the guest OS). This would require the guest kernel to boot
> with guest kernel option iommu=pt to by-pass the initialization of
> host (v1) table.
> 
> The AMD I/O Virtualization Technology (IOMMU) Specification Rev 3.10 [1]
> introduces a new flag 'HATDis' in the IVHD 11h IOMMU attributes to indicate
> that HAT is not supported on a particular IOMMU instance.
> 
> Therefore, modifies the AMD IOMMU driver to detect the new HATDis
> attributes, and disable host translation and switch to use guest
> translation if it is available. Otherwise, the driver will disable DMA
> translation.
> 
> [1] https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_IOMMU.pdf
> 
> Reviewed-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> Signed-off-by: Ankit Soni <Ankit.Soni@amd.com>
> ---
>  drivers/iommu/amd/amd_iommu.h       |  1 +
>  drivers/iommu/amd/amd_iommu_types.h |  6 +++++-
>  drivers/iommu/amd/init.c            | 21 ++++++++++++++++++++-
>  drivers/iommu/amd/iommu.c           | 13 +++++++++++++
>  4 files changed, 39 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
> index 220c598b7e14..bb14c4800dd0 100644
> --- a/drivers/iommu/amd/amd_iommu.h
> +++ b/drivers/iommu/amd/amd_iommu.h
> @@ -43,6 +43,7 @@ extern int amd_iommu_guest_ir;
>  extern enum protection_domain_mode amd_iommu_pgtable;
>  extern int amd_iommu_gpt_level;
>  extern unsigned long amd_iommu_pgsize_bitmap;
> +extern bool amd_iommu_hatdis;
>  
>  /* Protection domain ops */
>  void amd_iommu_init_identity_domain(void);
> diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
> index 5089b58e528a..284ff4309660 100644
> --- a/drivers/iommu/amd/amd_iommu_types.h
> +++ b/drivers/iommu/amd/amd_iommu_types.h
> @@ -460,6 +460,9 @@
>  /* IOMMU Feature Reporting Field (for IVHD type 10h */
>  #define IOMMU_FEAT_GASUP_SHIFT	6
>  
> +/* IOMMU HATDIS for IVHD type 11h and 40h */
> +#define IOMMU_IVHD_ATTR_HATDIS_SHIFT	0
> +
>  /* IOMMU Extended Feature Register (EFR) */
>  #define IOMMU_EFR_XTSUP_SHIFT	2
>  #define IOMMU_EFR_GASUP_SHIFT	7
> @@ -558,7 +561,8 @@ struct amd_io_pgtable {
>  };
>  
>  enum protection_domain_mode {
> -	PD_MODE_V1 = 1,
> +	PD_MODE_NONE,
> +	PD_MODE_V1,
>  	PD_MODE_V2,
>  };
>  
> diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
> index 14aa0d77df26..cfda9f89fe1d 100644
> --- a/drivers/iommu/amd/init.c
> +++ b/drivers/iommu/amd/init.c
> @@ -168,6 +168,9 @@ static int amd_iommu_target_ivhd_type;
>  u64 amd_iommu_efr;
>  u64 amd_iommu_efr2;
>  
> +/* Host (v1) page table is not supported*/
> +bool amd_iommu_hatdis;
> +
>  /* SNP is enabled on the system? */
>  bool amd_iommu_snp_en;
>  EXPORT_SYMBOL(amd_iommu_snp_en);
> @@ -1798,6 +1801,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
>  		if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
>  			amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
>  
> +		if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) {
> +			pr_warn_once("Host Address Translation is not supported.\n");
> +			amd_iommu_hatdis = true;
> +		}
> +
>  		early_iommu_features_init(iommu, h);
>  
>  		break;
> @@ -2582,7 +2590,7 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
>  	u32 devid;
>  	struct dev_table_entry *dev_table = pci_seg->dev_table;
>  
> -	if (dev_table == NULL)
> +	if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE)
>  		return;
>  

My suggestion in the past in the past has been to do:

if (!amd_iommu_hatdis || amd_iommu_pgtable != PD_MODE_NONE) {
	ret = iommu_device_sysfs_add(...)
	if (ret)
		return ret;
}

if (!amd_iommu_hatdis || amd_iommu_pgtable != PD_MODE_NONE)
	iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);

Also as a to simplify the attach/probe path i.e. remove the chunks in
amd_iommu_probe_device/init_device_table_dma.

Or is it that the generation of an ILLEGAL_DEVICE_TABLE_ENTRY event means we
still need to be careful here?

>  	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
> @@ -3095,6 +3103,17 @@ static int __init early_amd_iommu_init(void)
>  		}
>  	}
>  
> +	if (amd_iommu_hatdis) {
> +		/*
> +		 * Host (v1) page table is not available. Attempt to use
> +		 * Guest (v2) page table.
> +		 */
> +		if (amd_iommu_v2_pgtbl_supported())
> +			amd_iommu_pgtable = PD_MODE_V2;
> +		else
> +			amd_iommu_pgtable = PD_MODE_NONE;
> +	}
> +
>  	/* Disable any previously enabled IOMMUs */
>  	if (!is_kdump_kernel() || amd_iommu_disabled)
>  		disable_iommus();
> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index f34209b08b4c..4e9a57377b8c 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -2393,6 +2393,13 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
>  					     pci_max_pasids(to_pci_dev(dev)));
>  	}
>  
> +	if (amd_iommu_pgtable == PD_MODE_NONE) {
> +		pr_warn_once("%s: DMA translation not supported by iommu.\n",
> +			     __func__);
> +		iommu_dev = ERR_PTR(-ENODEV);
> +		goto out_err;
> +	}
> +

Strange place to put it after all the initialization has been done. Shouldn't it
be at the top of the function?

>  out_err:
>  
>  	iommu_completion_wait(iommu);
> @@ -2480,6 +2487,9 @@ static int pdom_setup_pgtable(struct protection_domain *domain,
>  	case PD_MODE_V2:
>  		fmt = AMD_IOMMU_V2;
>  		break;
> +	case PD_MODE_NONE:
> +		WARN_ON_ONCE(1);
> +		return -EPERM;
>  	}
>  
>  	domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev);> @@ -2501,6 +2511,9 @@
static inline u64 dma_max_address(enum protection_domain_mode pgtable)
>  
>  static bool amd_iommu_hd_support(struct amd_iommu *iommu)
>  {
> +	if (amd_iommu_hatdis)
> +		return false;
> +
>  	return iommu && (iommu->features & FEATURE_HDSUP);
>  }
>  

If there's no IOMMU group, how can we allocate a paging domain with dirty
support for host/v1 pgtable? I don't think you need this part?
Re: [PATCH v2 1/2] iommu/amd: Add HATDis feature support
Posted by Ankit Soni 7 months, 1 week ago
Hi Joao,

On Thu, May 08, 2025 at 06:04:12PM +0100, Joao Martins wrote:
> >  
> >  		break;
> > @@ -2582,7 +2590,7 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
> >  	u32 devid;
> >  	struct dev_table_entry *dev_table = pci_seg->dev_table;
> >  
> > -	if (dev_table == NULL)
> > +	if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE)
> >  		return;
> >  
> 
> My suggestion in the past in the past has been to do:
> 
> if (!amd_iommu_hatdis || amd_iommu_pgtable != PD_MODE_NONE) {
> 	ret = iommu_device_sysfs_add(...)
> 	if (ret)
> 		return ret;
> }
> 
> if (!amd_iommu_hatdis || amd_iommu_pgtable != PD_MODE_NONE)
> 	iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
> 
> Also as a to simplify the attach/probe path i.e. remove the chunks in
> amd_iommu_probe_device/init_device_table_dma.
> 
> Or is it that the generation of an ILLEGAL_DEVICE_TABLE_ENTRY event means we
> still need to be careful here?
> 

Correction in my last statement, with above check in iommu_device_register(),
machine is not booting and INVALID_DEVICE_REQUEST is generated continuously.
so iommu_device_register() needs to be called.

> > diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> > index f34209b08b4c..4e9a57377b8c 100644
> > --- a/drivers/iommu/amd/iommu.c
> > +++ b/drivers/iommu/amd/iommu.c
> > @@ -2393,6 +2393,13 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
> >  					     pci_max_pasids(to_pci_dev(dev)));
> >  	}
> >  
> > +	if (amd_iommu_pgtable == PD_MODE_NONE) {
> > +		pr_warn_once("%s: DMA translation not supported by iommu.\n",
> > +			     __func__);
> > +		iommu_dev = ERR_PTR(-ENODEV);
> > +		goto out_err;
> > +	}
> > +
> 
> Strange place to put it after all the initialization has been done. Shouldn't it
> be at the top of the function?
> 

In amd_iommu_probe_device() driver is setting ir domain with 
amd_iommu_set_pci_msi_domain(), hence i had chosen this place for return.

> >  out_err:
> >  
> >  	iommu_completion_wait(iommu);
> > @@ -2480,6 +2487,9 @@ static int pdom_setup_pgtable(struct protection_domain *domain,
> >  	case PD_MODE_V2:
> >  		fmt = AMD_IOMMU_V2;
> >  		break;
> > +	case PD_MODE_NONE:
> > +		WARN_ON_ONCE(1);
> > +		return -EPERM;
> >  	}
> >  
> >  	domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev);> @@ -2501,6 +2511,9 @@
> static inline u64 dma_max_address(enum protection_domain_mode pgtable)
> >  
> >  static bool amd_iommu_hd_support(struct amd_iommu *iommu)
> >  {
> > +	if (amd_iommu_hatdis)
> > +		return false;
> > +
> >  	return iommu && (iommu->features & FEATURE_HDSUP);
> >  }
> >  
> 
> If there's no IOMMU group, how can we allocate a paging domain with dirty
> support for host/v1 pgtable? I don't think you need this part?

This check is for when amd_iommu_hatdis is set and driver has set v2 page table,
then driver driver need to take care about dirty support.