[v1] iommu/amd: Add support for hw_info for iommu capability query

[PATCH] iommu/amd: Add support for hw_info for iommu capability query

Posted by Suravee Suthikulpanit 1 month, 2 weeks ago

AMD IOMMU Extended Feature (EFR) and Extended Feature 2 (EFR2) registers
specify features supported by each IOMMU hardware instance.
The IOMMU driver checks each feature-specific bits before enabling
each feature at run time.

For IOMMUFD, the hypervisor determines which IOMMU features to support
in the guest, and communicates this information to user-space (e.g. QEMU)
via iommufd IOMMU_DEVICE_GET_HW_INFO ioctl.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 drivers/iommu/amd/amd_iommu_types.h |  3 +++
 drivers/iommu/amd/iommu.c           | 40 +++++++++++++++++++++++++++++
 include/uapi/linux/iommufd.h        | 19 ++++++++++++++
 3 files changed, 62 insertions(+)

diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 5219d7ddfdaa..efdd0cbda1df 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -95,9 +95,12 @@
 #define FEATURE_HE		BIT_ULL(8)
 #define FEATURE_PC		BIT_ULL(9)
 #define FEATURE_HATS		GENMASK_ULL(11, 10)
+#define FEATURE_GATS_SHIFT	12
 #define FEATURE_GATS		GENMASK_ULL(13, 12)
+#define FEATURE_GLX_SHIFT	14
 #define FEATURE_GLX		GENMASK_ULL(15, 14)
 #define FEATURE_GAM_VAPIC	BIT_ULL(21)
+#define FEATURE_PASMAX_SHIFT	32
 #define FEATURE_PASMAX		GENMASK_ULL(36, 32)
 #define FEATURE_GIOSUP		BIT_ULL(48)
 #define FEATURE_HASUP		BIT_ULL(49)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index eb348c63a8d0..ebe1cb9b0807 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3038,8 +3038,48 @@ static const struct iommu_dirty_ops amd_dirty_ops = {
 	.read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
 };
 
+#define AMD_VIOMMU_EFR_GUEST_TRANSLATION_FLAGS \
+	(FEATURE_GT | FEATURE_GA | FEATURE_GIOSUP | \
+	 FEATURE_PPR | FEATURE_EPHSUP)
+
+static void _build_efr_guest_translation(struct amd_iommu *iommu, u64 *efr, u64 *efr2)
+{
+	/*
+	 * Build the EFR against the current hardware capabilities
+	 *
+	 * Also, not all IOMMU features are emulated by KVM.
+	 * Therefore, only advertise what KVM can support
+	 * or virtualzied by the hardware.
+	 */
+	if (!efr)
+		return;
+
+	*efr |= (amd_iommu_efr & AMD_VIOMMU_EFR_GUEST_TRANSLATION_FLAGS);
+	*efr |= (FIELD_GET(FEATURE_GATS, amd_iommu_efr) << FEATURE_GATS_SHIFT);
+	*efr |= (FIELD_GET(FEATURE_GLX, amd_iommu_efr) << FEATURE_GLX_SHIFT);
+	*efr |= (FIELD_GET(FEATURE_PASMAX, amd_iommu_efr) << FEATURE_PASMAX_SHIFT);
+	pr_debug("%s: efr=%#llx\n", __func__, *efr);
+}
+
+static void *amd_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
+{
+	struct iommu_hw_info_amd *hwinfo;
+	struct amd_iommu *iommu = rlookup_amd_iommu(dev);
+
+	hwinfo = kzalloc(sizeof(*hwinfo), GFP_KERNEL);
+	if (!hwinfo)
+		return ERR_PTR(-ENOMEM);
+
+	*length = sizeof(*hwinfo);
+	*type = IOMMU_HW_INFO_TYPE_AMD;
+
+	_build_efr_guest_translation(iommu, &hwinfo->efr, &hwinfo->efr2);
+	return hwinfo;
+}
+
 const struct iommu_ops amd_iommu_ops = {
 	.capable = amd_iommu_capable,
+	.hw_info = amd_iommu_hw_info,
 	.blocked_domain = &blocked_domain,
 	.release_domain = &release_domain,
 	.identity_domain = &identity_domain.domain,
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index c218c89e0e2e..0f7212f9e0ce 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -613,6 +613,24 @@ struct iommu_hw_info_tegra241_cmdqv {
 	__u8 __reserved;
 };
 
+/**
+ * struct iommu_hw_info_amd - AMD IOMMU device info
+ *
+ * @efr : Value of AMD IOMMU Extended Feature Register (EFR)
+ * @efr2: Value of AMD IOMMU Extended Feature 2 Register (EFR2)
+ *
+ * Please See description of these registers in the following sections of
+ * the AMD I/O Virtualization Technology (IOMMU) Specification.
+ * (https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_IOMMU.pdf)
+ *
+ * - MMIO Offset 0030h IOMMU Extended Feature Register
+ * - MMIO Offset 01A0h IOMMU Extended Feature 2 Register
+ */
+struct iommu_hw_info_amd {
+	__aligned_u64 efr;
+	__aligned_u64 efr2;
+};
+
 /**
  * enum iommu_hw_info_type - IOMMU Hardware Info Types
  * @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware
@@ -629,6 +647,7 @@ enum iommu_hw_info_type {
 	IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
 	IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
 	IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = 3,
+	IOMMU_HW_INFO_TYPE_AMD = 4,
 };
 
 /**
-- 
2.34.1

Re: [PATCH] iommu/amd: Add support for hw_info for iommu capability query

Posted by Jason Gunthorpe 1 month, 1 week ago

On Wed, Aug 20, 2025 at 04:25:33AM +0000, Suravee Suthikulpanit wrote:
> AMD IOMMU Extended Feature (EFR) and Extended Feature 2 (EFR2) registers
> specify features supported by each IOMMU hardware instance.
> The IOMMU driver checks each feature-specific bits before enabling
> each feature at run time.
> 
> For IOMMUFD, the hypervisor determines which IOMMU features to support
> in the guest, and communicates this information to user-space (e.g. QEMU)
> via iommufd IOMMU_DEVICE_GET_HW_INFO ioctl.
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> ---
>  drivers/iommu/amd/amd_iommu_types.h |  3 +++
>  drivers/iommu/amd/iommu.c           | 40 +++++++++++++++++++++++++++++
>  include/uapi/linux/iommufd.h        | 19 ++++++++++++++
>  3 files changed, 62 insertions(+)

Can you follow what ARM did and put the iommufd functions in a
amd/iommufd.c file? I think that worked pretty good.

Jason

> diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
> index 5219d7ddfdaa..efdd0cbda1df 100644
> --- a/drivers/iommu/amd/amd_iommu_types.h
> +++ b/drivers/iommu/amd/amd_iommu_types.h
> @@ -95,9 +95,12 @@
>  #define FEATURE_HE		BIT_ULL(8)
>  #define FEATURE_PC		BIT_ULL(9)
>  #define FEATURE_HATS		GENMASK_ULL(11, 10)
> +#define FEATURE_GATS_SHIFT	12
>  #define FEATURE_GATS		GENMASK_ULL(13, 12)
> +#define FEATURE_GLX_SHIFT	14
>  #define FEATURE_GLX		GENMASK_ULL(15, 14)
>  #define FEATURE_GAM_VAPIC	BIT_ULL(21)
> +#define FEATURE_PASMAX_SHIFT	32
>  #define FEATURE_PASMAX		GENMASK_ULL(36, 32)

Please no.. FIELD_PREP is how to get the shift, don't add constants.

> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index eb348c63a8d0..ebe1cb9b0807 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -3038,8 +3038,48 @@ static const struct iommu_dirty_ops amd_dirty_ops = {
>  	.read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
>  };
>  
> +#define AMD_VIOMMU_EFR_GUEST_TRANSLATION_FLAGS \
> +	(FEATURE_GT | FEATURE_GA | FEATURE_GIOSUP | \
> +	 FEATURE_PPR | FEATURE_EPHSUP)
> +
> +static void _build_efr_guest_translation(struct amd_iommu *iommu, u64 *efr, u64 *efr2)
> +{
> +	/*
> +	 * Build the EFR against the current hardware capabilities
> +	 *
> +	 * Also, not all IOMMU features are emulated by KVM.
> +	 * Therefore, only advertise what KVM can support
> +	 * or virtualzied by the hardware.
> +	 */

What does KVM have to do with iommu features on AMD architecture??

> +	if (!efr)
> +		return;
> +
> +	*efr |= (amd_iommu_efr & AMD_VIOMMU_EFR_GUEST_TRANSLATION_FLAGS);
> +	*efr |= (FIELD_GET(FEATURE_GATS, amd_iommu_efr) << FEATURE_GATS_SHIFT);
> +	*efr |= (FIELD_GET(FEATURE_GLX, amd_iommu_efr) << FEATURE_GLX_SHIFT);
> +	*efr |= (FIELD_GET(FEATURE_PASMAX, amd_iommu_efr) << FEATURE_PASMAX_SHIFT);
> +	pr_debug("%s: efr=%#llx\n", __func__, *efr);
> +}

I'm not sure all this masking is a good idea, how do you intend to
handshake upgrades when more features are supported if masking is
present?

Nothing sets efr2?

> +/**
> + * struct iommu_hw_info_amd - AMD IOMMU device info
> + *
> + * @efr : Value of AMD IOMMU Extended Feature Register (EFR)
> + * @efr2: Value of AMD IOMMU Extended Feature 2 Register (EFR2)
> + *
> + * Please See description of these registers in the following sections of
> + * the AMD I/O Virtualization Technology (IOMMU) Specification.
> + * (https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_IOMMU.pdf)
> + *
> + * - MMIO Offset 0030h IOMMU Extended Feature Register
> + * - MMIO Offset 01A0h IOMMU Extended Feature 2 Register
> + */

Need to document the masking and explain what the forwards/backwards
compatible strategy is here.

I think you should probably just pass the raw HW value through and
require the VMM to figure out what bits it needs based on feature
flags elsewhere.

Jason

Re: [PATCH] iommu/amd: Add support for hw_info for iommu capability query

Posted by Suthikulpanit, Suravee 1 month, 1 week ago


On 8/25/2025 8:44 AM, Jason Gunthorpe wrote:
> On Wed, Aug 20, 2025 at 04:25:33AM +0000, Suravee Suthikulpanit wrote:
>> AMD IOMMU Extended Feature (EFR) and Extended Feature 2 (EFR2) registers
>> specify features supported by each IOMMU hardware instance.
>> The IOMMU driver checks each feature-specific bits before enabling
>> each feature at run time.
>>
>> For IOMMUFD, the hypervisor determines which IOMMU features to support
>> in the guest, and communicates this information to user-space (e.g. QEMU)
>> via iommufd IOMMU_DEVICE_GET_HW_INFO ioctl.
>>
>> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>> ---
>>   drivers/iommu/amd/amd_iommu_types.h |  3 +++
>>   drivers/iommu/amd/iommu.c           | 40 +++++++++++++++++++++++++++++
>>   include/uapi/linux/iommufd.h        | 19 ++++++++++++++
>>   3 files changed, 62 insertions(+)
> 
> Can you follow what ARM did and put the iommufd functions in a
> amd/iommufd.c file? I think that worked pretty good.

Sure, I can put it in the amd/iommufd.c.

> 
>> diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
>> index 5219d7ddfdaa..efdd0cbda1df 100644
>> --- a/drivers/iommu/amd/amd_iommu_types.h
>> +++ b/drivers/iommu/amd/amd_iommu_types.h
>> @@ -95,9 +95,12 @@
>>   #define FEATURE_HE		BIT_ULL(8)
>>   #define FEATURE_PC		BIT_ULL(9)
>>   #define FEATURE_HATS		GENMASK_ULL(11, 10)
>> +#define FEATURE_GATS_SHIFT	12
>>   #define FEATURE_GATS		GENMASK_ULL(13, 12)
>> +#define FEATURE_GLX_SHIFT	14
>>   #define FEATURE_GLX		GENMASK_ULL(15, 14)
>>   #define FEATURE_GAM_VAPIC	BIT_ULL(21)
>> +#define FEATURE_PASMAX_SHIFT	32
>>   #define FEATURE_PASMAX		GENMASK_ULL(36, 32)
> 
> Please no.. FIELD_PREP is how to get the shift, don't add constants.

Good point. Ok

>> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
>> index eb348c63a8d0..ebe1cb9b0807 100644
>> --- a/drivers/iommu/amd/iommu.c
>> +++ b/drivers/iommu/amd/iommu.c
>> @@ -3038,8 +3038,48 @@ static const struct iommu_dirty_ops amd_dirty_ops = {
>>   	.read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
>>   };
>>   
>> +#define AMD_VIOMMU_EFR_GUEST_TRANSLATION_FLAGS \
>> +	(FEATURE_GT | FEATURE_GA | FEATURE_GIOSUP | \
>> +	 FEATURE_PPR | FEATURE_EPHSUP)
>> +
>> +static void _build_efr_guest_translation(struct amd_iommu *iommu, u64 *efr, u64 *efr2)
>> +{
>> +	/*
>> +	 * Build the EFR against the current hardware capabilities
>> +	 *
>> +	 * Also, not all IOMMU features are emulated by KVM.
>> +	 * Therefore, only advertise what KVM can support
>> +	 * or virtualzied by the hardware.
>> +	 */
> 
> What does KVM have to do with iommu features on AMD architecture??

Sorry, I meant to say hypervisor.

>> +	if (!efr)
>> +		return;
>> +
>> +	*efr |= (amd_iommu_efr & AMD_VIOMMU_EFR_GUEST_TRANSLATION_FLAGS);
>> +	*efr |= (FIELD_GET(FEATURE_GATS, amd_iommu_efr) << FEATURE_GATS_SHIFT);
>> +	*efr |= (FIELD_GET(FEATURE_GLX, amd_iommu_efr) << FEATURE_GLX_SHIFT);
>> +	*efr |= (FIELD_GET(FEATURE_PASMAX, amd_iommu_efr) << FEATURE_PASMAX_SHIFT);
>> +	pr_debug("%s: efr=%#llx\n", __func__, *efr);
>> +}
> 
> I'm not sure all this masking is a good idea, how do you intend to
> handshake upgrades when more features are supported if masking is
> present?
> 
> Nothing sets efr2?

We are planning to extend this function to advertise additional flags in 
EFR/EFR2 once we add guest support for the corresponded features in the 
hypervisor in the future.

>> +/**
>> + * struct iommu_hw_info_amd - AMD IOMMU device info
>> + *
>> + * @efr : Value of AMD IOMMU Extended Feature Register (EFR)
>> + * @efr2: Value of AMD IOMMU Extended Feature 2 Register (EFR2)
>> + *
>> + * Please See description of these registers in the following sections of
>> + * the AMD I/O Virtualization Technology (IOMMU) Specification.
>> + * (https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_IOMMU.pdf)
>> + *
>> + * - MMIO Offset 0030h IOMMU Extended Feature Register
>> + * - MMIO Offset 01A0h IOMMU Extended Feature 2 Register
>> + */
> 
> Need to document the masking and explain what the forwards/backwards
> compatible strategy is here.

Sure, I can add a note that certain EFR/EFR2 are masked based on the 
available support in the hypervisor.

> I think you should probably just pass the raw HW value through and
> require the VMM to figure out what bits it needs based on feature
> flags elsewhere.

The problem is some of the features are virtualized by hardware, which 
needs enabling from the Linux AMD IOMMU driver. We cannot just provide 
all flags since VMM would not know if the kernel has the support 
enabled. Some of these bits are also propagated to guest AMD IOMMU 
driver. If we don't mask them, the guest would detect and try to enable 
unsupported features, which would cause failure.

Thanks,
Suravee

Re: [PATCH] iommu/amd: Add support for hw_info for iommu capability query

Posted by Jason Gunthorpe 1 month, 1 week ago

On Tue, Aug 26, 2025 at 12:36:23PM -0500, Suthikulpanit, Suravee wrote:
> > I think you should probably just pass the raw HW value through and
> > require the VMM to figure out what bits it needs based on feature
> > flags elsewhere.
> 
> The problem is some of the features are virtualized by hardware, which needs
> enabling from the Linux AMD IOMMU driver. We cannot just provide all flags
> since VMM would not know if the kernel has the support enabled.

The VMM is not supposed to forward these flags as-is! It is sort of
some kind of maximum what the underlying HW can support.

If you forward as-is then the VMM will forward broken flags it doesn't
support when the kernel gets updated, that isn't OK.

Each and every feature the VMM wants to show in the EFR has to figured
out on its own if it can be supported based on other kernel features.

The utility of the get_info return is for HW features that don't
require any special kernel enablement.

This is all the same as ARM which is working this way, I don't think
there is a reason to deviate here, it just gets confusing and opens up
paths for bugs.

Pass the real values from HW, write a comment similar to ARM that says
these are raw HW values and the VMM must generate its own EFR not copy
blindly from here.

Jason

Re: [PATCH] iommu/amd: Add support for hw_info for iommu capability query

Posted by Suthikulpanit, Suravee 1 month, 1 week ago


On 8/26/2025 12:58 PM, Jason Gunthorpe wrote:
> On Tue, Aug 26, 2025 at 12:36:23PM -0500, Suthikulpanit, Suravee wrote:
>>> I think you should probably just pass the raw HW value through and
>>> require the VMM to figure out what bits it needs based on feature
>>> flags elsewhere.
>>
>> The problem is some of the features are virtualized by hardware, which needs
>> enabling from the Linux AMD IOMMU driver. We cannot just provide all flags
>> since VMM would not know if the kernel has the support enabled.
> 
> The VMM is not supposed to forward these flags as-is! It is sort of
> some kind of maximum what the underlying HW can support.
> 
> If you forward as-is then the VMM will forward broken flags it doesn't
> support when the kernel gets updated, that isn't OK.

I got this part. That's why we mask out unsupported feature flags before 
returning the EFR/EFR2 to the VMM.

> Each and every feature the VMM wants to show in the EFR has to figured
> out on its own if it can be supported based on other kernel features.
> 
> The utility of the get_info return is for HW features that don't
> require any special kernel enablement.

Not sure if I got this part. Are you referring to the struct 
vfio_iommu_type1_info and vfio_iommu_type1_get_info()?

Suravee

Re: [PATCH] iommu/amd: Add support for hw_info for iommu capability query

Posted by Jason Gunthorpe 1 month, 1 week ago

On Tue, Aug 26, 2025 at 01:43:59PM -0500, Suthikulpanit, Suravee wrote:
> 
> 
> On 8/26/2025 12:58 PM, Jason Gunthorpe wrote:
> > On Tue, Aug 26, 2025 at 12:36:23PM -0500, Suthikulpanit, Suravee wrote:
> > > > I think you should probably just pass the raw HW value through and
> > > > require the VMM to figure out what bits it needs based on feature
> > > > flags elsewhere.
> > > 
> > > The problem is some of the features are virtualized by hardware, which needs
> > > enabling from the Linux AMD IOMMU driver. We cannot just provide all flags
> > > since VMM would not know if the kernel has the support enabled.
> > 
> > The VMM is not supposed to forward these flags as-is! It is sort of
> > some kind of maximum what the underlying HW can support.
> > 
> > If you forward as-is then the VMM will forward broken flags it doesn't
> > support when the kernel gets updated, that isn't OK.
> 
> I got this part. That's why we mask out unsupported feature flags before
> returning the EFR/EFR2 to the VMM.

The kernel can't do anything on behalf of the VMM, it doesn't know
what the VMM even supports emulating.

The VMM alone is responsible to build the efr/efr2 values. The VMM may
choose to copy only some bits from the kernel, but only if it knows it
can support whatever it is copying.

> > Each and every feature the VMM wants to show in the EFR has to figured
> > out on its own if it can be supported based on other kernel features.
> > 
> > The utility of the get_info return is for HW features that don't
> > require any special kernel enablement.
> 
> Not sure if I got this part. Are you referring to the struct
> vfio_iommu_type1_info and vfio_iommu_type1_get_info()?

Sorry hw_info.

Jason